dockonsurf / modules / formats.py @ 1d8c374e
Historique | Voir | Annoter | Télécharger (10,51 ko)
1 | fd2384fc | Carles Marti | """Module for the interconversion between different kinds of atomic data.
|
---|---|---|---|
2 | e23f119b | Carles |
|
3 | e23f119b | Carles | functions:
|
4 | fd2384fc | Carles Marti | confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
|
5 | fd2384fc | Carles Marti | of separate rdkit.Mol objects.
|
6 | fd2384fc | Carles Marti | rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
|
7 | fd2384fc | Carles Marti | add_special_atoms: Allows ase to use custom elements with symbols not in the
|
8 | fd2384fc | Carles Marti | periodic table.
|
9 | b6f47f2d | Carles | adapt_format: Converts the coordinate files into a required library object type.
|
10 | fd2384fc | Carles Marti | read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
|
11 | fd2384fc | Carles Marti | ase.Atoms object.
|
12 | fd2384fc | Carles Marti | collect_coords: Directs the reading of coordinates on a set of subdirectories.
|
13 | fd2384fc | Carles Marti | read_energy_cp2k: Reads the CP2K out file and returns its final energy.
|
14 | fd2384fc | Carles Marti | collect_energies: Directs the reading of energies on a set of subdirectories.
|
15 | e23f119b | Carles | """
|
16 | e23f119b | Carles | |
17 | e23f119b | Carles | import logging |
18 | f3004731 | Carles | |
19 | f3004731 | Carles | import rdkit.Chem.AllChem as Chem |
20 | f3004731 | Carles | |
21 | 89a980fc | Carles | logger = logging.getLogger('DockOnSurf')
|
22 | e23f119b | Carles | |
23 | e23f119b | Carles | |
24 | f3004731 | Carles | def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None): |
25 | f3004731 | Carles | """Converts the conformers inside a rdkit mol object to a list of
|
26 | f3004731 | Carles | separate mol objects.
|
27 | f3004731 | Carles |
|
28 | f3004731 | Carles | @param mol: rdkit mol object containing at least one conformer.
|
29 | 9510666f | Carles | @param idx_lst: list of conformer indices to be considered. If not passed,
|
30 | 9510666f | Carles | all conformers are considered.
|
31 | f3004731 | Carles | @return: list of separate mol objects.
|
32 | f3004731 | Carles | """
|
33 | f3004731 | Carles | if idx_lst is None: |
34 | f3004731 | Carles | idx_lst = list(range(mol.GetNumConformers())) |
35 | f02e613b | Carles Marti | return [Chem.MolFromMolBlock(
|
36 | f02e613b | Carles Marti | Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""), |
37 | f02e613b | Carles Marti | removeHs=False) for idx in idx_lst] |
38 | f3004731 | Carles | |
39 | f3004731 | Carles | |
40 | f3004731 | Carles | def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol): |
41 | f3004731 | Carles | """Converts a rdkit mol object into ase Atoms object.
|
42 | f3004731 | Carles | @param mol: rdkit mol object containing only one conformer.
|
43 | f3004731 | Carles | @return ase.Atoms: ase Atoms object with the same coordinates.
|
44 | f3004731 | Carles | """
|
45 | f3004731 | Carles | from ase import Atoms |
46 | 4933cb8a | Carles Martí | if mol.GetNumConformers() > 1: |
47 | 9510666f | Carles | logger.warning('A mol object with multiple conformers is parsed, '
|
48 | 695dcff8 | Carles Marti | 'converting to Atoms only the first conformer.')
|
49 | f3004731 | Carles | symbols = [atm.GetSymbol() for atm in mol.GetAtoms()] |
50 | f3004731 | Carles | positions = mol.GetConformer(0).GetPositions()
|
51 | f3004731 | Carles | return Atoms(symbols=symbols, positions=positions)
|
52 | f3004731 | Carles | |
53 | f3004731 | Carles | |
54 | cc92f6ee | Carles Marti | def add_special_atoms(symbol_pairs): |
55 | fd2384fc | Carles Marti | """Allows ase to use custom elements with symbols not in the periodic table.
|
56 | cc92f6ee | Carles Marti |
|
57 | cc92f6ee | Carles Marti | This function adds new chemical elements to be used by ase. Every new custom
|
58 | cc92f6ee | Carles Marti | element must have a traditional (present in the periodic table) partner
|
59 | cc92f6ee | Carles Marti | from which to obtain all its properties.
|
60 | fd2384fc | Carles Marti | @param symbol_pairs: List of tuples containing pairs of chemical symbols.
|
61 | cc92f6ee | Carles Marti | Every tuple contains a pair of chemical symbols, the first label must be
|
62 | cc92f6ee | Carles Marti | the label of the custom element and the second one the symbol of the
|
63 | cc92f6ee | Carles Marti | reference one (traditional present on the periodic table).
|
64 | cc92f6ee | Carles Marti | @return:
|
65 | 587dca22 | Carles Marti | """ # TODO Enable special atoms for rdkit |
66 | cc92f6ee | Carles Marti | import numpy as np |
67 | cc92f6ee | Carles Marti | from ase import data |
68 | cc92f6ee | Carles Marti | for i, pair in enumerate(symbol_pairs): |
69 | cc92f6ee | Carles Marti | data.chemical_symbols += [pair[0]]
|
70 | cc92f6ee | Carles Marti | z_orig = data.atomic_numbers[pair[1]]
|
71 | cc92f6ee | Carles Marti | orig_iupac_mass = data.atomic_masses_iupac2016[z_orig] |
72 | cc92f6ee | Carles Marti | orig_com_mass = data.atomic_masses_common[z_orig] |
73 | cc92f6ee | Carles Marti | data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1 |
74 | cc92f6ee | Carles Marti | data.atomic_names += [pair[0]]
|
75 | cc92f6ee | Carles Marti | data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016, |
76 | cc92f6ee | Carles Marti | orig_iupac_mass) |
77 | cc92f6ee | Carles Marti | data.atomic_masses = data.atomic_masses_iupac2016 |
78 | cc92f6ee | Carles Marti | data.atomic_masses_common = np.append(data.atomic_masses_common, |
79 | cc92f6ee | Carles Marti | orig_com_mass) |
80 | cc92f6ee | Carles Marti | data.covalent_radii = np.append(data.covalent_radii, |
81 | cc92f6ee | Carles Marti | data.covalent_radii[z_orig]) |
82 | cc92f6ee | Carles Marti | data.reference_states += [data.reference_states[z_orig]] |
83 | 90819cc3 | Carles Marti | # TODO Add vdw_radii, gsmm and aml (smaller length)
|
84 | cc92f6ee | Carles Marti | |
85 | cc92f6ee | Carles Marti | |
86 | 90819cc3 | Carles Marti | def adapt_format(requirement, coord_file, spec_atms=tuple()): |
87 | e23f119b | Carles | """Converts the coordinate files into a required library object type.
|
88 | e23f119b | Carles |
|
89 | e23f119b | Carles | Depending on the library required to use and the file type, it converts the
|
90 | e23f119b | Carles | coordinate file into a library-workable object.
|
91 | e23f119b | Carles | @param requirement: str, the library for which the conversion should be
|
92 | e23f119b | Carles | made. Accepted values: 'ase', 'rdkit'.
|
93 | e23f119b | Carles | @param coord_file: str, path to the coordinates file aiming to convert.
|
94 | e23f119b | Carles | Accepted file tyoes: 'xyz', 'mol'.
|
95 | 90819cc3 | Carles Marti | @param spec_atms: List of tuples containing pairs of new/traditional
|
96 | 90819cc3 | Carles Marti | chemical symbols.
|
97 | e23f119b | Carles | @return: an object the required library can work with.
|
98 | 748a6036 | Carles Martí | """
|
99 | 439ce5f7 | Carles | import ase.io |
100 | 8ab593ee | Carles | from ase.io.formats import filetype |
101 | 8ab593ee | Carles | |
102 | 748a6036 | Carles Martí | from modules.utilities import try_command |
103 | 748a6036 | Carles Martí | |
104 | 8ab593ee | Carles | req_vals = ['rdkit', 'ase'] |
105 | 4381145e | Carles | lib_err = f"The conversion to the '{requirement}' library object type" \
|
106 | 4381145e | Carles | f" has not yet been implemented"
|
107 | 4381145e | Carles | conv_info = f"Converted {coord_file} to {requirement} object type"
|
108 | 4381145e | Carles | |
109 | 748a6036 | Carles Martí | fil_type_err = f'The {filetype(coord_file)} file format is not supported'
|
110 | 4381145e | Carles | |
111 | 4381145e | Carles | if requirement not in req_vals: |
112 | 9f7bb440 | Carles | logger.error(lib_err) |
113 | 4381145e | Carles | raise NotImplementedError(lib_err) |
114 | 4381145e | Carles | |
115 | 8ab593ee | Carles | if requirement == 'rdkit': |
116 | 748a6036 | Carles Martí | from modules.xyz2mol import xyz2mol |
117 | 748a6036 | Carles Martí | if filetype(coord_file) == 'xyz': # TODO Include detection of charges. |
118 | 439ce5f7 | Carles | ase_atms = ase.io.read(coord_file) |
119 | 439ce5f7 | Carles | atomic_nums = ase_atms.get_atomic_numbers().tolist() |
120 | 439ce5f7 | Carles | xyz_coordinates = ase_atms.positions.tolist() |
121 | 748a6036 | Carles Martí | rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates) |
122 | 8d08beb4 | Carles | logger.debug(conv_info) |
123 | 21e2cca5 | Carles Marti | return Chem.AddHs(rd_mol_obj)
|
124 | 8ab593ee | Carles | elif filetype(coord_file) == 'mol': |
125 | 8d08beb4 | Carles | logger.debug(conv_info) |
126 | 21e2cca5 | Carles Marti | return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False)) |
127 | 748a6036 | Carles Martí | else:
|
128 | 748a6036 | Carles Martí | ase_atms = try_command(ase.io.read, |
129 | 748a6036 | Carles Martí | [(ase.io.formats.UnknownFileTypeError, |
130 | 748a6036 | Carles Martí | fil_type_err)], |
131 | 748a6036 | Carles Martí | coord_file) |
132 | 748a6036 | Carles Martí | atomic_nums = ase_atms.get_atomic_numbers().tolist() |
133 | 748a6036 | Carles Martí | xyz_coordinates = ase_atms.positions.tolist() |
134 | 748a6036 | Carles Martí | return xyz2mol(atomic_nums, xyz_coordinates)
|
135 | 8ab593ee | Carles | |
136 | 8ab593ee | Carles | if requirement == 'ase': |
137 | cc92f6ee | Carles Marti | add_special_atoms(spec_atms) |
138 | 21e2cca5 | Carles Marti | if filetype(coord_file) == 'xyz': |
139 | 21e2cca5 | Carles Marti | logger.debug(conv_info) |
140 | 21e2cca5 | Carles Marti | return ase.io.read(coord_file)
|
141 | 21e2cca5 | Carles Marti | elif filetype(coord_file) == 'mol': |
142 | 21e2cca5 | Carles Marti | logger.debug(conv_info) |
143 | 21e2cca5 | Carles Marti | rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
|
144 | 21e2cca5 | Carles Marti | return rdkit_mol_to_ase_atoms(rd_mol)
|
145 | 748a6036 | Carles Martí | else:
|
146 | 748a6036 | Carles Martí | return try_command(ase.io.read,
|
147 | 748a6036 | Carles Martí | [(ase.io.formats.UnknownFileTypeError, |
148 | 748a6036 | Carles Martí | fil_type_err)], |
149 | 748a6036 | Carles Martí | coord_file) |
150 | b6f47f2d | Carles | |
151 | b6f47f2d | Carles | |
152 | ed01b5b7 | Carles Marti | def read_coords_cp2k(file, spec_atoms=tuple()): |
153 | ed01b5b7 | Carles Marti | """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
|
154 | ed01b5b7 | Carles Marti | object.
|
155 | ed01b5b7 | Carles Marti |
|
156 | ed01b5b7 | Carles Marti | @param file: The file to read containing the coordinates.
|
157 | ed01b5b7 | Carles Marti | @param spec_atoms: List of tuples containing the pairs of chemical symbols.
|
158 | ed01b5b7 | Carles Marti | @return: ase.Atoms object of the coordinates in the file.
|
159 | ed01b5b7 | Carles Marti | """
|
160 | ed01b5b7 | Carles Marti | import numpy as np |
161 | ed01b5b7 | Carles Marti | from ase import Atoms |
162 | ed01b5b7 | Carles Marti | from pycp2k import CP2K |
163 | ed01b5b7 | Carles Marti | |
164 | ed01b5b7 | Carles Marti | cp2k = CP2K() |
165 | ed01b5b7 | Carles Marti | cp2k.parse(file)
|
166 | ed01b5b7 | Carles Marti | force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
|
167 | ed01b5b7 | Carles Marti | raw_coords = force_eval.SUBSYS.COORD.Default_keyword |
168 | ed01b5b7 | Carles Marti | symbols = [atom.split()[0] for atom in raw_coords] |
169 | 1d8c374e | Carles Martí | positions = np.array([[float(coord) for coord in atom.split()[1:]] |
170 | 1d8c374e | Carles Martí | for atom in raw_coords]) |
171 | ed01b5b7 | Carles Marti | if len(spec_atoms) > 0: |
172 | 4387ce69 | Carles Marti | add_special_atoms(spec_atoms) |
173 | ed01b5b7 | Carles Marti | return Atoms(symbols=symbols, positions=positions)
|
174 | ed01b5b7 | Carles Marti | |
175 | ed01b5b7 | Carles Marti | |
176 | cdc1edbe | Carles Martí | def read_coords_vasp(file, spec_atoms=tuple()): |
177 | cdc1edbe | Carles Martí | """Reads the coordinates from VASP OUTCAR and returns an ase.Atoms object.
|
178 | cdc1edbe | Carles Martí |
|
179 | cdc1edbe | Carles Martí | @param file: The file to read containing the coordinates.
|
180 | cdc1edbe | Carles Martí | @param spec_atoms: List of tuples containing the pairs of chemical symbols.
|
181 | cdc1edbe | Carles Martí | @return: ase.Atoms object of the coordinates in the file.
|
182 | cdc1edbe | Carles Martí | """
|
183 | cdc1edbe | Carles Martí | import os |
184 | cdc1edbe | Carles Martí | import ase.io |
185 | cdc1edbe | Carles Martí | if not os.path.isfile(file): |
186 | cdc1edbe | Carles Martí | err_msg = f"File {file} not found."
|
187 | cdc1edbe | Carles Martí | logger.error(err_msg) |
188 | cdc1edbe | Carles Martí | raise FileNotFoundError(err_msg)
|
189 | cdc1edbe | Carles Martí | if len(spec_atoms) > 0: |
190 | cdc1edbe | Carles Martí | add_special_atoms(spec_atoms) |
191 | cdc1edbe | Carles Martí | return ase.io.read(file) |
192 | cdc1edbe | Carles Martí | |
193 | cdc1edbe | Carles Martí | |
194 | ed01b5b7 | Carles Marti | def read_energy_cp2k(file): |
195 | 1d8c374e | Carles Martí | """Reads the CP2K output file and returns its final energy.
|
196 | ed01b5b7 | Carles Marti |
|
197 | ed01b5b7 | Carles Marti | @param file: The file from which the energy should be read.
|
198 | 1d8c374e | Carles Martí | @return: The last energy on the out file in eV.
|
199 | ed01b5b7 | Carles Marti | """
|
200 | ed01b5b7 | Carles Marti | out_fh = open(file, 'r') |
201 | ed01b5b7 | Carles Marti | energy = None
|
202 | ed01b5b7 | Carles Marti | for line in out_fh: |
203 | ed01b5b7 | Carles Marti | if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line: |
204 | 9371fd91 | Carles Marti | energy = float(line.strip().split(':')[1]) * 27.2113845 # Ha to eV |
205 | ed01b5b7 | Carles Marti | out_fh.close() |
206 | ed01b5b7 | Carles Marti | return energy
|
207 | ed01b5b7 | Carles Marti | |
208 | ed01b5b7 | Carles Marti | |
209 | 1d8c374e | Carles Martí | def collect_confs(dir_list, code, run_type, spec_atms=tuple()): |
210 | 1d8c374e | Carles Martí | """Reads the coordinates and energies of a list of finished calculations.
|
211 | f8c4eafe | Carles |
|
212 | fd2384fc | Carles Marti | Given a dockonsurf directory hierarchy: project/run_type/conf_X
|
213 | 1d8c374e | Carles Martí | (run_type = ['isolated', 'screening' or 'refinement']) it reads the
|
214 | 1d8c374e | Carles Martí | coordinates of each conf_X, it assigns its total energy from the calculation
|
215 | 1d8c374e | Carles Martí | and assigns the conf_X label to track its origin. Finally it returns the
|
216 | 1d8c374e | Carles Martí | ase.Atoms object.
|
217 | 1d8c374e | Carles Martí |
|
218 | 1d8c374e | Carles Martí | @param dir_list: List of directories where to read the coords from.
|
219 | 1d8c374e | Carles Martí | @param code: the code that produced the calculation results files.
|
220 | 1d8c374e | Carles Martí | @param run_type: the type of calculation (and also the name of the folder)
|
221 | 1d8c374e | Carles Martí | containing the calculation subdirectories.
|
222 | 1d8c374e | Carles Martí | @param spec_atms: List of tuples containing pairs of new/traditional
|
223 | 1d8c374e | Carles Martí | chemical symbols.
|
224 | 1d8c374e | Carles Martí | @return: list of ase.Atoms objects.
|
225 | f8c4eafe | Carles | """
|
226 | fd2384fc | Carles Marti | from glob import glob |
227 | 1d8c374e | Carles Martí | import os |
228 | 1d8c374e | Carles Martí | from modules.utilities import is_binary |
229 | 1d8c374e | Carles Martí | atoms_list = [] |
230 | 1d8c374e | Carles Martí | for conf_dir in dir_list: |
231 | 1d8c374e | Carles Martí | conf_path = f"{run_type}/{conf_dir}/"
|
232 | ed01b5b7 | Carles Marti | if code == 'cp2k': |
233 | 1d8c374e | Carles Martí | ase_atms = read_coords_cp2k(glob(f"{conf_path}/*-1.restart")[0], |
234 | 1d8c374e | Carles Martí | spec_atms) |
235 | 1d8c374e | Carles Martí | # Assign energy
|
236 | 1d8c374e | Carles Martí | for fil in os.listdir(conf_path): |
237 | 1d8c374e | Carles Martí | if is_binary(conf_path+fil):
|
238 | 1d8c374e | Carles Martí | continue
|
239 | 1d8c374e | Carles Martí | conf_energy = read_energy_cp2k(conf_path+fil) |
240 | 1d8c374e | Carles Martí | if conf_energy is not None: |
241 | 1d8c374e | Carles Martí | ase_atms.info["energy"] = conf_energy
|
242 | 1d8c374e | Carles Martí | break
|
243 | 1d8c374e | Carles Martí | ase_atms.info[run_type[:3]] = conf_dir
|
244 | 1d8c374e | Carles Martí | atoms_list.append(ase_atms) |
245 | 1d8c374e | Carles Martí | elif code == 'vasp': |
246 | 1d8c374e | Carles Martí | ase_atms = read_coords_vasp(f"{conf_path}/OUTCAR", spec_atms)
|
247 | 1d8c374e | Carles Martí | ase_atms.info["energy"] = ase_atms.get_total_energy() * 27.2113845 |
248 | 1d8c374e | Carles Martí | ase_atms.info[run_type[:3]] = conf_dir
|
249 | 1d8c374e | Carles Martí | atoms_list.append(ase_atms) |
250 | cdc1edbe | Carles Martí | else:
|
251 | 1d8c374e | Carles Martí | err_msg = f"Collect coordinates not implemented for '{code}'."
|
252 | cdc1edbe | Carles Martí | logger.error(err_msg) |
253 | cdc1edbe | Carles Martí | raise NotImplementedError(err_msg) |
254 | 1d8c374e | Carles Martí | return atoms_list
|