dockonsurf / modules / isolated.py @ 54be2a9e
Historique | Voir | Annoter | Télécharger (7,9 ko)
1 |
"""Functions to generate the conformers to be adsorbed and the most stable one.
|
---|---|
2 |
|
3 |
functions:
|
4 |
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
|
5 |
of independent mol objects.
|
6 |
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
|
7 |
gen_confs: Generate a number of conformers in random orientations.
|
8 |
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
|
9 |
get_moments_of_inertia: Computes moments of inertia of the given conformers.
|
10 |
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
|
11 |
new mol object and the energies of its conformers.
|
12 |
run_isolated: directs the execution of functions to achieve the goal
|
13 |
"""
|
14 |
import logging |
15 |
|
16 |
import numpy as np |
17 |
from rdkit.Chem import AllChem as Chem |
18 |
|
19 |
logger = logging.getLogger('DockOnSurf')
|
20 |
|
21 |
|
22 |
def confs_to_mol_list(mol: Chem.rdchem.Mol): |
23 |
"""Converts the conformers inside a rdkit mol object to a list of
|
24 |
separate mol objects.
|
25 |
|
26 |
@param mol: rdkit mol object containing at least one conformer.
|
27 |
@return list: list of separate mol objects.
|
28 |
"""
|
29 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
|
30 |
for conf in mol.GetConformers()] |
31 |
|
32 |
|
33 |
def remove_C_linked_Hs(mol: Chem.rdchem.Mol): |
34 |
"""Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
|
35 |
|
36 |
@param mol: rdkit mol object of the molecule with hydrogen atoms.
|
37 |
@return: rdkit mol object of the molecule without hydrogen atoms linked to
|
38 |
a carbon atom.
|
39 |
|
40 |
The functions removes the hydrogen atoms bonded to carbon atoms while
|
41 |
keeping the ones bonded to other atoms or non-bonded at all.
|
42 |
"""
|
43 |
|
44 |
mol = Chem.RWMol(mol) |
45 |
rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())] |
46 |
|
47 |
for atm_idx in rev_atm_idxs: |
48 |
atom = mol.GetAtomWithIdx(atm_idx) |
49 |
if atom.GetAtomicNum() != 1: |
50 |
continue
|
51 |
for neigh in atom.GetNeighbors(): |
52 |
if neigh.GetAtomicNum() == 6: |
53 |
mol.RemoveAtom(atom.GetIdx()) |
54 |
return mol
|
55 |
|
56 |
|
57 |
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True): |
58 |
"""Generate conformers in random orientations.
|
59 |
|
60 |
@param mol: rdkit mol object of the molecule to be adsorbed.
|
61 |
@param num_confs: number of conformers to randomly generate.
|
62 |
@param local_min: bool: if generated conformers should be a local minimum.
|
63 |
@return: mol: rdkit mol object containing the different conformers.
|
64 |
rmsd_mtx: triangular matrix with the rmsd values of conformers.
|
65 |
|
66 |
Using the rdkit library, conformers are randomly generated. If structures
|
67 |
are required to be local minima, ie. setting the 'local_min' value to
|
68 |
True, a geometry optimisation using UFF is performed.
|
69 |
"""
|
70 |
logger.debug('Generating Conformers')
|
71 |
|
72 |
conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
|
73 |
if local_min:
|
74 |
for conf in conf_ids: |
75 |
Chem.UFFOptimizeMolecule(mol, confId=conf) |
76 |
|
77 |
Chem.AlignMolConformers(mol) |
78 |
logger.info(f'Generated {len(mol.GetConformers())} conformers')
|
79 |
return mol
|
80 |
|
81 |
|
82 |
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"): |
83 |
"""Computes the rmsd matrix of the conformers in a rdkit mol object.
|
84 |
|
85 |
@param mol: rdkit mol object containing at least two conformers.
|
86 |
@param remove_Hs: bool or str,
|
87 |
@return rmsd_matrix: Matrix containing the rmsd values of every pair of
|
88 |
conformers.
|
89 |
|
90 |
The RMSD values of every pair of conformers is computed, stored in matrix
|
91 |
form and returned back. The calculation of rmsd values can take into
|
92 |
account all hydrogens, none, or only the ones not linked to carbon atoms.
|
93 |
"""
|
94 |
if mol.GetNumConformers() < 2: |
95 |
err = "The provided molecule has less than 2 conformers"
|
96 |
logger.error(err) |
97 |
raise ValueError(err) |
98 |
|
99 |
if not remove_Hs: |
100 |
pass
|
101 |
elif remove_Hs or remove_Hs.lower() == "all": |
102 |
mol = Chem.RemoveHs(mol) |
103 |
elif remove_Hs.lower() == "c": |
104 |
mol = remove_C_linked_Hs(mol) |
105 |
else:
|
106 |
err = "remove_Hs value does not have an acceptable value"
|
107 |
logger.error(err) |
108 |
raise ValueError(err) |
109 |
|
110 |
num_confs = mol.GetNumConformers() |
111 |
conf_ids = list(range(num_confs)) |
112 |
rmsd_mtx = np.zeros((num_confs, num_confs)) |
113 |
for conf1 in conf_ids: |
114 |
for conf2 in conf_ids[conf1 + 1:]: |
115 |
rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1) |
116 |
rmsd_mtx[conf1][conf2] = rmsd |
117 |
rmsd_mtx[conf2][conf1] = rmsd |
118 |
|
119 |
return rmsd_mtx
|
120 |
|
121 |
|
122 |
def get_moments_of_inertia(mol: Chem.rdchem.Mol): |
123 |
"""Computes the moments of inertia of the given conformers
|
124 |
|
125 |
@param mol: rdkit mol object of the relevant molecule.
|
126 |
@return numpy array 2D: The inner array contains the moments of inertia for
|
127 |
the three principal axis of a given conformer. They are ordered by its value
|
128 |
in ascending order. The outer tuple loops over the conformers.
|
129 |
"""
|
130 |
from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3 |
131 |
|
132 |
return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)] |
133 |
for conf in range(mol.GetNumConformers())]) |
134 |
|
135 |
|
136 |
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000): |
137 |
"""Optimizes the geometry of the given conformers and returns the new mol
|
138 |
object and the energies of its conformers.
|
139 |
|
140 |
@param mol: rdkit mol object of the relevant molecule.
|
141 |
@param max_iters: Maximum number of geometry optimization iterations. With 0
|
142 |
a single point energy calculation is performed and only the conformer
|
143 |
energies are returned.
|
144 |
@return mol: rdkit mol object of the optimized molecule.
|
145 |
@return numpy.ndarray: Array with the energies of the optimized conformers.
|
146 |
|
147 |
The MMFF forcefield is used for the geometry optimization in its rdkit
|
148 |
implementation. With max_iters value set to 0, a single point energy
|
149 |
calculation is performed and only the energies are returned. For values
|
150 |
larger than 0, if the geometry does not converge for a certain conformer,
|
151 |
the latter is removed from the list of conformers and its energy is not
|
152 |
included in the returned list.
|
153 |
"""
|
154 |
from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs |
155 |
|
156 |
init_num_confs = mol.GetNumConformers() |
157 |
results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
|
158 |
maxIters=max_iters, |
159 |
nonBondedThresh=10))
|
160 |
|
161 |
# Remove non-converged conformers if optimization is on, ie. maxIters > 0
|
162 |
# return all conformers if optimization is switched off, ie. maxIters = 0
|
163 |
if max_iters > 0: |
164 |
for i, conv in enumerate(results[:, 0]): |
165 |
if conv != 0: |
166 |
mol.RemoveConformer(i) |
167 |
for i, conf in enumerate(mol.GetConformers()): |
168 |
conf.SetId(i) |
169 |
if mol.GetNumConformers() < init_num_confs:
|
170 |
logger.warning(f'MMFF Geometry optimization did not comverge for at'
|
171 |
f'least one conformer. Continuing with '
|
172 |
f'{mol.GetNumConformers()} converged conformers')
|
173 |
logger.info(f'Optimized conformers with MMFF.')
|
174 |
return mol, np.array([res[1] for res in results if res[0] == 0]) |
175 |
else:
|
176 |
logger.info(f'Computed conformers energy with MMFF.')
|
177 |
return np.array([res[1] for res in results]) |
178 |
|
179 |
|
180 |
def run_isolated(inp_vars): |
181 |
"""Directs the execution of functions to obtain the conformers to adsorb
|
182 |
|
183 |
@param inp_vars:
|
184 |
@return:
|
185 |
"""
|
186 |
from formats import adapt_format |
187 |
|
188 |
# from clustering import *
|
189 |
logger.info('Carrying out procedures for the isolated molecule')
|
190 |
rd_mol = adapt_format('rdkit', inp_vars['molec_file']) |
191 |
confs = gen_confs(rd_mol, inp_vars['num_conformers'])
|
192 |
rmsd_mtx = get_rmsd(confs) |
193 |
|
194 |
if 'moi' in inp_vars['cluster_magns']: |
195 |
confs_moi = get_moments_of_inertia(confs) |
196 |
|
197 |
if 'energy' in inp_vars['cluster_magns']: |
198 |
if inp_vars['min_confs']: |
199 |
confs, confs_eners = mmff_opt_confs(confs) |
200 |
else:
|
201 |
confs_eners = mmff_opt_confs(confs, max_iters=0)
|
202 |
|
203 |
# clustering2()
|