dockonsurf / modules / isolated.py @ fd33bfdb
Historique | Voir | Annoter | Télécharger (8,69 ko)
1 | 4614bb6a | Carles | """Functions to generate the conformers to be adsorbed and the most stable one.
|
---|---|---|---|
2 | 4614bb6a | Carles |
|
3 | 5c70d6c6 | Carles | functions:
|
4 | 92924d24 | Carles | confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
|
5 | 92924d24 | Carles | of independent mol objects.
|
6 | 5c70d6c6 | Carles | remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
|
7 | 92924d24 | Carles | gen_confs: Generate a number of conformers in random orientations.
|
8 | 92924d24 | Carles | get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
|
9 | 92924d24 | Carles | get_moments_of_inertia: Computes moments of inertia of the given conformers.
|
10 | 92924d24 | Carles | mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
|
11 | 92924d24 | Carles | new mol object and the energies of its conformers.
|
12 | 4614bb6a | Carles | run_isolated: directs the execution of functions to achieve the goal
|
13 | 4614bb6a | Carles | """
|
14 | fd33bfdb | Carles | import os |
15 | 4614bb6a | Carles | import logging |
16 | 4614bb6a | Carles | |
17 | 92924d24 | Carles | import numpy as np |
18 | 92924d24 | Carles | from rdkit.Chem import AllChem as Chem |
19 | 92924d24 | Carles | |
20 | 4614bb6a | Carles | logger = logging.getLogger('DockOnSurf')
|
21 | 4614bb6a | Carles | |
22 | 4614bb6a | Carles | |
23 | 92924d24 | Carles | def confs_to_mol_list(mol: Chem.rdchem.Mol): |
24 | 92924d24 | Carles | """Converts the conformers inside a rdkit mol object to a list of
|
25 | 92924d24 | Carles | separate mol objects.
|
26 | 92924d24 | Carles |
|
27 | 92924d24 | Carles | @param mol: rdkit mol object containing at least one conformer.
|
28 | 92924d24 | Carles | @return list: list of separate mol objects.
|
29 | 92924d24 | Carles | """
|
30 | 92924d24 | Carles | return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
|
31 | 92924d24 | Carles | for conf in mol.GetConformers()] |
32 | 92924d24 | Carles | |
33 | 92924d24 | Carles | |
34 | 92924d24 | Carles | def remove_C_linked_Hs(mol: Chem.rdchem.Mol): |
35 | b1f6e69d | Carles | """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
|
36 | 5c70d6c6 | Carles |
|
37 | 92924d24 | Carles | @param mol: rdkit mol object of the molecule with hydrogen atoms.
|
38 | 92924d24 | Carles | @return: rdkit mol object of the molecule without hydrogen atoms linked to
|
39 | 92924d24 | Carles | a carbon atom.
|
40 | 92924d24 | Carles |
|
41 | 92924d24 | Carles | The functions removes the hydrogen atoms bonded to carbon atoms while
|
42 | 92924d24 | Carles | keeping the ones bonded to other atoms or non-bonded at all.
|
43 | 5c70d6c6 | Carles | """
|
44 | 5c70d6c6 | Carles | |
45 | 5c70d6c6 | Carles | mol = Chem.RWMol(mol) |
46 | 5c70d6c6 | Carles | rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())] |
47 | 5c70d6c6 | Carles | |
48 | 5c70d6c6 | Carles | for atm_idx in rev_atm_idxs: |
49 | 5c70d6c6 | Carles | atom = mol.GetAtomWithIdx(atm_idx) |
50 | 5c70d6c6 | Carles | if atom.GetAtomicNum() != 1: |
51 | 5c70d6c6 | Carles | continue
|
52 | 5c70d6c6 | Carles | for neigh in atom.GetNeighbors(): |
53 | 5c70d6c6 | Carles | if neigh.GetAtomicNum() == 6: |
54 | 5c70d6c6 | Carles | mol.RemoveAtom(atom.GetIdx()) |
55 | 5c70d6c6 | Carles | return mol
|
56 | 5c70d6c6 | Carles | |
57 | 5c70d6c6 | Carles | |
58 | 92924d24 | Carles | def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True): |
59 | 92924d24 | Carles | """Generate conformers in random orientations.
|
60 | 4614bb6a | Carles |
|
61 | 4614bb6a | Carles | @param mol: rdkit mol object of the molecule to be adsorbed.
|
62 | 92924d24 | Carles | @param num_confs: number of conformers to randomly generate.
|
63 | b1f6e69d | Carles | @param local_min: bool: if generated conformers should be a local minimum.
|
64 | 4614bb6a | Carles | @return: mol: rdkit mol object containing the different conformers.
|
65 | 92924d24 | Carles | rmsd_mtx: triangular matrix with the rmsd values of conformers.
|
66 | b1f6e69d | Carles |
|
67 | 92924d24 | Carles | Using the rdkit library, conformers are randomly generated. If structures
|
68 | 92924d24 | Carles | are required to be local minima, ie. setting the 'local_min' value to
|
69 | 92924d24 | Carles | True, a geometry optimisation using UFF is performed.
|
70 | 4614bb6a | Carles | """
|
71 | 92924d24 | Carles | logger.debug('Generating Conformers')
|
72 | 9e83b5a1 | Carles | |
73 | 5c70d6c6 | Carles | conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
|
74 | b1f6e69d | Carles | if local_min:
|
75 | b1f6e69d | Carles | for conf in conf_ids: |
76 | b1f6e69d | Carles | Chem.UFFOptimizeMolecule(mol, confId=conf) |
77 | 5c70d6c6 | Carles | |
78 | 5c70d6c6 | Carles | Chem.AlignMolConformers(mol) |
79 | 92924d24 | Carles | logger.info(f'Generated {len(mol.GetConformers())} conformers')
|
80 | 92924d24 | Carles | return mol
|
81 | 92924d24 | Carles | |
82 | 62576f52 | Carles | |
83 | 92924d24 | Carles | def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"): |
84 | 92924d24 | Carles | """Computes the rmsd matrix of the conformers in a rdkit mol object.
|
85 | 92924d24 | Carles |
|
86 | 92924d24 | Carles | @param mol: rdkit mol object containing at least two conformers.
|
87 | 92924d24 | Carles | @param remove_Hs: bool or str,
|
88 | 92924d24 | Carles | @return rmsd_matrix: Matrix containing the rmsd values of every pair of
|
89 | 92924d24 | Carles | conformers.
|
90 | 92924d24 | Carles |
|
91 | 92924d24 | Carles | The RMSD values of every pair of conformers is computed, stored in matrix
|
92 | 92924d24 | Carles | form and returned back. The calculation of rmsd values can take into
|
93 | 92924d24 | Carles | account all hydrogens, none, or only the ones not linked to carbon atoms.
|
94 | 92924d24 | Carles | """
|
95 | 92924d24 | Carles | if mol.GetNumConformers() < 2: |
96 | 92924d24 | Carles | err = "The provided molecule has less than 2 conformers"
|
97 | 92924d24 | Carles | logger.error(err) |
98 | 92924d24 | Carles | raise ValueError(err) |
99 | 92924d24 | Carles | |
100 | 92924d24 | Carles | if not remove_Hs: |
101 | 92924d24 | Carles | pass
|
102 | 92924d24 | Carles | elif remove_Hs or remove_Hs.lower() == "all": |
103 | 92924d24 | Carles | mol = Chem.RemoveHs(mol) |
104 | 92924d24 | Carles | elif remove_Hs.lower() == "c": |
105 | 92924d24 | Carles | mol = remove_C_linked_Hs(mol) |
106 | 92924d24 | Carles | else:
|
107 | 54be2a9e | Carles | err = "remove_Hs value does not have an acceptable value"
|
108 | 54be2a9e | Carles | logger.error(err) |
109 | 54be2a9e | Carles | raise ValueError(err) |
110 | 92924d24 | Carles | |
111 | 92924d24 | Carles | num_confs = mol.GetNumConformers() |
112 | 92924d24 | Carles | conf_ids = list(range(num_confs)) |
113 | 92924d24 | Carles | rmsd_mtx = np.zeros((num_confs, num_confs)) |
114 | 5c70d6c6 | Carles | for conf1 in conf_ids: |
115 | 5c70d6c6 | Carles | for conf2 in conf_ids[conf1 + 1:]: |
116 | 92924d24 | Carles | rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1) |
117 | 92924d24 | Carles | rmsd_mtx[conf1][conf2] = rmsd |
118 | 92924d24 | Carles | rmsd_mtx[conf2][conf1] = rmsd |
119 | 92924d24 | Carles | |
120 | 92924d24 | Carles | return rmsd_mtx
|
121 | 4614bb6a | Carles | |
122 | 92924d24 | Carles | |
123 | 92924d24 | Carles | def get_moments_of_inertia(mol: Chem.rdchem.Mol): |
124 | 92924d24 | Carles | """Computes the moments of inertia of the given conformers
|
125 | 92924d24 | Carles |
|
126 | 92924d24 | Carles | @param mol: rdkit mol object of the relevant molecule.
|
127 | 92924d24 | Carles | @return numpy array 2D: The inner array contains the moments of inertia for
|
128 | 92924d24 | Carles | the three principal axis of a given conformer. They are ordered by its value
|
129 | 92924d24 | Carles | in ascending order. The outer tuple loops over the conformers.
|
130 | 92924d24 | Carles | """
|
131 | 92924d24 | Carles | from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3 |
132 | 92924d24 | Carles | |
133 | 92924d24 | Carles | return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)] |
134 | 92924d24 | Carles | for conf in range(mol.GetNumConformers())]) |
135 | 92924d24 | Carles | |
136 | 92924d24 | Carles | |
137 | 62576f52 | Carles | def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000): |
138 | 92924d24 | Carles | """Optimizes the geometry of the given conformers and returns the new mol
|
139 | 92924d24 | Carles | object and the energies of its conformers.
|
140 | 92924d24 | Carles |
|
141 | 92924d24 | Carles | @param mol: rdkit mol object of the relevant molecule.
|
142 | 62576f52 | Carles | @param max_iters: Maximum number of geometry optimization iterations. With 0
|
143 | 62576f52 | Carles | a single point energy calculation is performed and only the conformer
|
144 | 62576f52 | Carles | energies are returned.
|
145 | 92924d24 | Carles | @return mol: rdkit mol object of the optimized molecule.
|
146 | 92924d24 | Carles | @return numpy.ndarray: Array with the energies of the optimized conformers.
|
147 | 92924d24 | Carles |
|
148 | 92924d24 | Carles | The MMFF forcefield is used for the geometry optimization in its rdkit
|
149 | 62576f52 | Carles | implementation. With max_iters value set to 0, a single point energy
|
150 | 62576f52 | Carles | calculation is performed and only the energies are returned. For values
|
151 | 62576f52 | Carles | larger than 0, if the geometry does not converge for a certain conformer,
|
152 | 92924d24 | Carles | the latter is removed from the list of conformers and its energy is not
|
153 | 92924d24 | Carles | included in the returned list.
|
154 | 92924d24 | Carles | """
|
155 | 92924d24 | Carles | from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs |
156 | 92924d24 | Carles | |
157 | 92924d24 | Carles | init_num_confs = mol.GetNumConformers() |
158 | 92924d24 | Carles | results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
|
159 | 62576f52 | Carles | maxIters=max_iters, |
160 | 62576f52 | Carles | nonBondedThresh=10))
|
161 | 62576f52 | Carles | |
162 | 62576f52 | Carles | # Remove non-converged conformers if optimization is on, ie. maxIters > 0
|
163 | 62576f52 | Carles | # return all conformers if optimization is switched off, ie. maxIters = 0
|
164 | 62576f52 | Carles | if max_iters > 0: |
165 | 62576f52 | Carles | for i, conv in enumerate(results[:, 0]): |
166 | 62576f52 | Carles | if conv != 0: |
167 | 62576f52 | Carles | mol.RemoveConformer(i) |
168 | 62576f52 | Carles | for i, conf in enumerate(mol.GetConformers()): |
169 | 62576f52 | Carles | conf.SetId(i) |
170 | 62576f52 | Carles | if mol.GetNumConformers() < init_num_confs:
|
171 | 62576f52 | Carles | logger.warning(f'MMFF Geometry optimization did not comverge for at'
|
172 | 62576f52 | Carles | f'least one conformer. Continuing with '
|
173 | 62576f52 | Carles | f'{mol.GetNumConformers()} converged conformers')
|
174 | 62576f52 | Carles | logger.info(f'Optimized conformers with MMFF.')
|
175 | 62576f52 | Carles | return mol, np.array([res[1] for res in results if res[0] == 0]) |
176 | 62576f52 | Carles | else:
|
177 | 62576f52 | Carles | logger.info(f'Computed conformers energy with MMFF.')
|
178 | 62576f52 | Carles | return np.array([res[1] for res in results]) |
179 | 4614bb6a | Carles | |
180 | 4614bb6a | Carles | |
181 | fd33bfdb | Carles | def prep_run(inp_file, mol: Chem.rdchem.Mol, exemplars): |
182 | fd33bfdb | Carles | from shutil import copy |
183 | fd33bfdb | Carles | # Checking if 'isolated' directory already exists and if so backing it up.
|
184 | fd33bfdb | Carles | dir_name = 'isolated'
|
185 | fd33bfdb | Carles | bak_num = 0
|
186 | fd33bfdb | Carles | while dir_name in os.listdir("."): |
187 | fd33bfdb | Carles | bak_num += 1
|
188 | fd33bfdb | Carles | dir_name = dir_name.split(".")[0]+f".bak{bak_num}" |
189 | fd33bfdb | Carles | if bak_num > 0: |
190 | fd33bfdb | Carles | os.rename('isolated', dir_name)
|
191 | fd33bfdb | Carles | logger.warning("'isolated' directory already present. Moved former "
|
192 | fd33bfdb | Carles | f"directory to {dir_name}")
|
193 | fd33bfdb | Carles | os.mkdir('isolated')
|
194 | fd33bfdb | Carles | |
195 | fd33bfdb | Carles | # Creating
|
196 | fd33bfdb | Carles | for i, conf in enumerate(exemplars): |
197 | fd33bfdb | Carles | os.mkdir(f'isolated/conf_{i}')
|
198 | fd33bfdb | Carles | copy(inp_file, f'isolated/conf_{i}/')
|
199 | fd33bfdb | Carles | Chem.MolToXYZFile(mol, f'isolated/conf_{i}/coord.xyz', confId=conf)
|
200 | fd33bfdb | Carles | |
201 | fd33bfdb | Carles | |
202 | 4614bb6a | Carles | def run_isolated(inp_vars): |
203 | 4614bb6a | Carles | """Directs the execution of functions to obtain the conformers to adsorb
|
204 | 4614bb6a | Carles |
|
205 | 4614bb6a | Carles | @param inp_vars:
|
206 | 4614bb6a | Carles | @return:
|
207 | 4614bb6a | Carles | """
|
208 | 54be2a9e | Carles | from formats import adapt_format |
209 | 75325cc3 | Carles | from clustering import clustering |
210 | 54be2a9e | Carles | |
211 | 34a03ee1 | Carles | # from clustering import *
|
212 | 4614bb6a | Carles | logger.info('Carrying out procedures for the isolated molecule')
|
213 | 4614bb6a | Carles | rd_mol = adapt_format('rdkit', inp_vars['molec_file']) |
214 | 92924d24 | Carles | confs = gen_confs(rd_mol, inp_vars['num_conformers'])
|
215 | fd33bfdb | Carles | if inp_vars['min_confs']: |
216 | fd33bfdb | Carles | confs, confs_ener = mmff_opt_confs(confs) |
217 | 34a03ee1 | Carles | rmsd_mtx = get_rmsd(confs) |
218 | fd33bfdb | Carles | exemplars = clustering(rmsd_mtx) |
219 | fd33bfdb | Carles | prep_run(inp_vars['isol_inp_file'], confs, exemplars)
|
220 | fd33bfdb | Carles | # run dft
|
221 | 92924d24 | Carles | |
222 | 34a03ee1 | Carles | if 'moi' in inp_vars['cluster_magns']: |
223 | 34a03ee1 | Carles | confs_moi = get_moments_of_inertia(confs) |
224 | 92924d24 | Carles | |
225 | fd33bfdb | Carles | # if 'energy' in inp_vars['cluster_magns']: |