dockonsurf / modules / formats.py @ b4aef3d7
Historique | Voir | Annoter | Télécharger (6,22 ko)
1 | b6f47f2d | Carles | """Module for the conversion between atomic coordinates files and objects
|
---|---|---|---|
2 | e23f119b | Carles |
|
3 | e23f119b | Carles | functions:
|
4 | f3004731 | Carles | confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
|
5 | f3004731 | Carles | of separate mol objects.
|
6 | f3004731 | Carles | rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
|
7 | b6f47f2d | Carles | adapt_format: Converts the coordinate files into a required library object type.
|
8 | b6f47f2d | Carles | read_coords: Reads the atomic coordinates resulting from finished calculations.
|
9 | e23f119b | Carles | """
|
10 | e23f119b | Carles | |
11 | e23f119b | Carles | import logging |
12 | f3004731 | Carles | |
13 | f3004731 | Carles | import rdkit.Chem.AllChem as Chem |
14 | f3004731 | Carles | |
15 | 89a980fc | Carles | logger = logging.getLogger('DockOnSurf')
|
16 | e23f119b | Carles | |
17 | e23f119b | Carles | |
18 | f3004731 | Carles | def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None): |
19 | f3004731 | Carles | """Converts the conformers inside a rdkit mol object to a list of
|
20 | f3004731 | Carles | separate mol objects.
|
21 | f3004731 | Carles |
|
22 | f3004731 | Carles | @param mol: rdkit mol object containing at least one conformer.
|
23 | 9510666f | Carles | @param idx_lst: list of conformer indices to be considered. If not passed,
|
24 | 9510666f | Carles | all conformers are considered.
|
25 | f3004731 | Carles | @return: list of separate mol objects.
|
26 | f3004731 | Carles | """
|
27 | f3004731 | Carles | if idx_lst is None: |
28 | f3004731 | Carles | idx_lst = list(range(mol.GetNumConformers())) |
29 | f3004731 | Carles | return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)), |
30 | f3004731 | Carles | removeHs=False) for idx in idx_lst] |
31 | f3004731 | Carles | |
32 | f3004731 | Carles | |
33 | f3004731 | Carles | def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol): |
34 | f3004731 | Carles | """Converts a rdkit mol object into ase Atoms object.
|
35 | f3004731 | Carles | @param mol: rdkit mol object containing only one conformer.
|
36 | f3004731 | Carles | @return ase.Atoms: ase Atoms object with the same coordinates.
|
37 | f3004731 | Carles | """
|
38 | f3004731 | Carles | from ase import Atoms |
39 | 4933cb8a | Carles Martí | if mol.GetNumConformers() > 1: |
40 | 9510666f | Carles | logger.warning('A mol object with multiple conformers is parsed, '
|
41 | 9510666f | Carles | 'converting to Atoms only the first conformer')
|
42 | f3004731 | Carles | symbols = [atm.GetSymbol() for atm in mol.GetAtoms()] |
43 | f3004731 | Carles | positions = mol.GetConformer(0).GetPositions()
|
44 | f3004731 | Carles | return Atoms(symbols=symbols, positions=positions)
|
45 | f3004731 | Carles | |
46 | f3004731 | Carles | |
47 | 83f022c9 | Carles | def adapt_format(requirement, coord_file): |
48 | e23f119b | Carles | """Converts the coordinate files into a required library object type.
|
49 | e23f119b | Carles |
|
50 | e23f119b | Carles | Depending on the library required to use and the file type, it converts the
|
51 | e23f119b | Carles | coordinate file into a library-workable object.
|
52 | e23f119b | Carles | @param requirement: str, the library for which the conversion should be
|
53 | e23f119b | Carles | made. Accepted values: 'ase', 'rdkit'.
|
54 | e23f119b | Carles | @param coord_file: str, path to the coordinates file aiming to convert.
|
55 | e23f119b | Carles | Accepted file tyoes: 'xyz', 'mol'.
|
56 | e23f119b | Carles | @return: an object the required library can work with.
|
57 | e23f119b | Carles | """
|
58 | 439ce5f7 | Carles | import ase.io |
59 | 8ab593ee | Carles | from ase.io.formats import filetype |
60 | 8ab593ee | Carles | |
61 | 8ab593ee | Carles | req_vals = ['rdkit', 'ase'] |
62 | 8ab593ee | Carles | file_type_vals = ['xyz', 'mol'] |
63 | 4381145e | Carles | lib_err = f"The conversion to the '{requirement}' library object type" \
|
64 | 4381145e | Carles | f" has not yet been implemented"
|
65 | 4381145e | Carles | conv_info = f"Converted {coord_file} to {requirement} object type"
|
66 | 4381145e | Carles | |
67 | f3004731 | Carles | fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
|
68 | 4381145e | Carles | |
69 | 4381145e | Carles | if requirement not in req_vals: |
70 | 9f7bb440 | Carles | logger.error(lib_err) |
71 | 4381145e | Carles | raise NotImplementedError(lib_err) |
72 | 4381145e | Carles | |
73 | 4381145e | Carles | if filetype(coord_file) not in file_type_vals: |
74 | 9f7bb440 | Carles | logger.error(fil_type_err) |
75 | 4381145e | Carles | raise NotImplementedError(fil_type_err) |
76 | 8ab593ee | Carles | |
77 | 8ab593ee | Carles | if requirement == 'rdkit': |
78 | 8ab593ee | Carles | if filetype(coord_file) == 'xyz': |
79 | af3e2441 | Carles Marti | from modules.xyz2mol import xyz2mol |
80 | 439ce5f7 | Carles | ase_atms = ase.io.read(coord_file) |
81 | 439ce5f7 | Carles | atomic_nums = ase_atms.get_atomic_numbers().tolist() |
82 | 439ce5f7 | Carles | xyz_coordinates = ase_atms.positions.tolist() |
83 | b6f47f2d | Carles | # TODO Add routine to read charge
|
84 | b6f47f2d | Carles | rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
|
85 | 8d08beb4 | Carles | logger.debug(conv_info) |
86 | 21e2cca5 | Carles Marti | return Chem.AddHs(rd_mol_obj)
|
87 | 8ab593ee | Carles | elif filetype(coord_file) == 'mol': |
88 | 8d08beb4 | Carles | logger.debug(conv_info) |
89 | 21e2cca5 | Carles Marti | return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False)) |
90 | 8ab593ee | Carles | |
91 | 8ab593ee | Carles | if requirement == 'ase': |
92 | 21e2cca5 | Carles Marti | if filetype(coord_file) == 'xyz': |
93 | 21e2cca5 | Carles Marti | logger.debug(conv_info) |
94 | 21e2cca5 | Carles Marti | return ase.io.read(coord_file)
|
95 | 21e2cca5 | Carles Marti | elif filetype(coord_file) == 'mol': |
96 | 21e2cca5 | Carles Marti | logger.debug(conv_info) |
97 | 21e2cca5 | Carles Marti | rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
|
98 | 21e2cca5 | Carles Marti | return rdkit_mol_to_ase_atoms(rd_mol)
|
99 | b6f47f2d | Carles | |
100 | b6f47f2d | Carles | |
101 | b6f47f2d | Carles | def read_coords(code, run_type, req): |
102 | b6f47f2d | Carles | """Reads the atomic coordinates resulting from finished calculations.
|
103 | b6f47f2d | Carles |
|
104 | b6f47f2d | Carles | Given a run_type ('isolated', 'screening' or 'refinement') directory
|
105 | b6f47f2d | Carles | containing different subdirectories with finished calculations in every
|
106 | f8c4eafe | Carles | subdirectory, it reads, from each subirectory, the final coordinates
|
107 | f8c4eafe | Carles | resulting from the calculation and returns a list of objects adequate to the
|
108 | f8c4eafe | Carles | required library.
|
109 | b6f47f2d | Carles |
|
110 | f8c4eafe | Carles | @param code: the code that produced the calculation results files.
|
111 | b6f47f2d | Carles | @param run_type: the type of calculation (and also the name of the folder)
|
112 | b6f47f2d | Carles | containing the calculation subdirectories.
|
113 | f8c4eafe | Carles | @param req: The required library object type to make the list of (eg. rdkit,
|
114 | f8c4eafe | Carles | ase)
|
115 | b6f47f2d | Carles | @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
|
116 | b6f47f2d | Carles | """
|
117 | b6f47f2d | Carles | import os |
118 | b6f47f2d | Carles | if code == 'cp2k': |
119 | b6f47f2d | Carles | pattern = '-pos-1.xyz'
|
120 | b6f47f2d | Carles | else:
|
121 | b6f47f2d | Carles | pattern = ''
|
122 | b6f47f2d | Carles | return [adapt_format(req, f'{run_type}/{conf}/{fil}') |
123 | b6f47f2d | Carles | for conf in os.listdir(run_type) |
124 | b6f47f2d | Carles | for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil] |
125 | f8c4eafe | Carles | |
126 | f8c4eafe | Carles | |
127 | f8c4eafe | Carles | def read_energies(code, run_type): |
128 | f8c4eafe | Carles | """Reads the energies resulting from finished calculations.
|
129 | f8c4eafe | Carles |
|
130 | f8c4eafe | Carles | Given a run_type ('isolated', 'screening' or 'refinement') directory
|
131 | f8c4eafe | Carles | containing different subdirectories with finished calculations in every
|
132 | f8c4eafe | Carles | subdirectory, it reads the final energies of calculations inside each
|
133 | f8c4eafe | Carles | subdirectory.
|
134 | f8c4eafe | Carles |
|
135 | f8c4eafe | Carles | @param code: the code that produced the calculation results files.
|
136 | f8c4eafe | Carles | @param run_type: the type of calculation (and also the name of the folder)
|
137 | f8c4eafe | Carles | containing the calculation subdirectories.
|
138 | f8c4eafe | Carles | @return: list of energies
|
139 | f8c4eafe | Carles | """
|
140 | f8c4eafe | Carles | import os |
141 | f8c4eafe | Carles | import numpy as np |
142 | af3e2441 | Carles Marti | from modules.utilities import tail |
143 | f8c4eafe | Carles | |
144 | f8c4eafe | Carles | energies = [] |
145 | f8c4eafe | Carles | if code == 'cp2k': |
146 | f8c4eafe | Carles | pattern = '-pos-1.xyz'
|
147 | f8c4eafe | Carles | for conf in os.listdir(run_type): |
148 | f8c4eafe | Carles | for fil in os.listdir(f"{run_type}/{conf}"): |
149 | f8c4eafe | Carles | if pattern in fil: |
150 | f8c4eafe | Carles | traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb') |
151 | f8c4eafe | Carles | num_atoms = int(traj_fh.readline().strip())
|
152 | f8c4eafe | Carles | last_geo = tail(traj_fh, num_atoms + 2).splitlines()
|
153 | f8c4eafe | Carles | for line in last_geo: |
154 | f8c4eafe | Carles | if 'E =' in line: |
155 | f8c4eafe | Carles | energies.append(float(line.split('E =')[1])) |
156 | f8c4eafe | Carles | |
157 | f8c4eafe | Carles | return np.array(energies) |