Révision 92924d24 modules/isolated.py
b/modules/isolated.py | ||
---|---|---|
1 | 1 |
"""Functions to generate the conformers to be adsorbed and the most stable one. |
2 | 2 |
|
3 | 3 |
functions: |
4 |
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list |
|
5 |
of independent mol objects. |
|
4 | 6 |
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule. |
5 |
gen_confs: Generate a number of different conformers in random orientations. |
|
7 |
gen_confs: Generate a number of conformers in random orientations. |
|
8 |
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object. |
|
9 |
get_moments_of_inertia: Computes moments of inertia of the given conformers. |
|
10 |
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the |
|
11 |
new mol object and the energies of its conformers. |
|
6 | 12 |
run_isolated: directs the execution of functions to achieve the goal |
7 | 13 |
""" |
8 | 14 |
import logging |
9 | 15 |
|
16 |
import numpy as np |
|
17 |
from rdkit.Chem import AllChem as Chem |
|
18 |
|
|
10 | 19 |
from formats import adapt_format |
11 | 20 |
|
12 | 21 |
logger = logging.getLogger('DockOnSurf') |
13 | 22 |
|
14 | 23 |
|
15 |
def remove_C_linked_Hs(mol): |
|
24 |
def confs_to_mol_list(mol: Chem.rdchem.Mol): |
|
25 |
"""Converts the conformers inside a rdkit mol object to a list of |
|
26 |
separate mol objects. |
|
27 |
|
|
28 |
@param mol: rdkit mol object containing at least one conformer. |
|
29 |
@return list: list of separate mol objects. |
|
30 |
""" |
|
31 |
return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId())) |
|
32 |
for conf in mol.GetConformers()] |
|
33 |
|
|
34 |
|
|
35 |
def remove_C_linked_Hs(mol: Chem.rdchem.Mol): |
|
16 | 36 |
"""Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object. |
17 | 37 |
|
18 |
@param mol: rdkit mol object of the molecule with hydrogens. |
|
19 |
@return: rdkit mol object of the molecule without hydrogens linked to carbon |
|
20 |
atoms. |
|
21 |
The functions removes the hydrogens bonded to C atoms while keeping the |
|
22 |
ones bonded to heteroatoms. |
|
38 |
@param mol: rdkit mol object of the molecule with hydrogen atoms. |
|
39 |
@return: rdkit mol object of the molecule without hydrogen atoms linked to |
|
40 |
a carbon atom. |
|
41 |
|
|
42 |
The functions removes the hydrogen atoms bonded to carbon atoms while |
|
43 |
keeping the ones bonded to other atoms or non-bonded at all. |
|
23 | 44 |
""" |
24 |
from rdkit import Chem |
|
25 | 45 |
|
26 | 46 |
mol = Chem.RWMol(mol) |
27 | 47 |
rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())] |
... | ... | |
36 | 56 |
return mol |
37 | 57 |
|
38 | 58 |
|
39 |
def gen_confs(mol, num_confs, rmsd_thld, local_min=True):
|
|
40 |
"""Generate a number of different conformers in random orientations.
|
|
59 |
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
|
|
60 |
"""Generate conformers in random orientations. |
|
41 | 61 |
|
42 | 62 |
@param mol: rdkit mol object of the molecule to be adsorbed. |
43 |
@param num_confs: number of raw conformers to randomly generate. |
|
44 |
@param rmsd_thld: rmsd threshold to consider two conformers as different. |
|
63 |
@param num_confs: number of conformers to randomly generate. |
|
45 | 64 |
@param local_min: bool: if generated conformers should be a local minimum. |
46 | 65 |
@return: mol: rdkit mol object containing the different conformers. |
66 |
rmsd_mtx: triangular matrix with the rmsd values of conformers. |
|
47 | 67 |
|
48 |
Using the rdkit library, conformers are randomly generated. |
|
49 |
If structures are required to be local minima, ie. setting the 'local_min' |
|
50 |
value to True, a geometry optimisation using UFF is performed. |
|
51 |
Identical conformers are removed when, by evaluating the RMSD of |
|
52 |
two structures without carbon-bonded hydrogens, their value lies below the |
|
53 |
'rmsd_thld'. |
|
54 |
The resulting conformers are returned back formatted as rdkit mol object. |
|
68 |
Using the rdkit library, conformers are randomly generated. If structures |
|
69 |
are required to be local minima, ie. setting the 'local_min' value to |
|
70 |
True, a geometry optimisation using UFF is performed. |
|
55 | 71 |
""" |
56 |
from rdkit.Chem import AllChem as Chem |
|
57 |
logger.info('Generating Conformers') |
|
72 |
logger.debug('Generating Conformers') |
|
58 | 73 |
|
59 | 74 |
conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0) |
60 | 75 |
if local_min: |
... | ... | |
62 | 77 |
Chem.UFFOptimizeMolecule(mol, confId=conf) |
63 | 78 |
|
64 | 79 |
Chem.AlignMolConformers(mol) |
65 |
mol_wo_Hs = remove_C_linked_Hs(mol) |
|
80 |
logger.info(f'Generated {len(mol.GetConformers())} conformers') |
|
81 |
return mol |
|
82 |
|
|
83 |
|
|
84 |
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"): |
|
85 |
"""Computes the rmsd matrix of the conformers in a rdkit mol object. |
|
86 |
|
|
87 |
@param mol: rdkit mol object containing at least two conformers. |
|
88 |
@param remove_Hs: bool or str, |
|
89 |
@return rmsd_matrix: Matrix containing the rmsd values of every pair of |
|
90 |
conformers. |
|
91 |
|
|
92 |
The RMSD values of every pair of conformers is computed, stored in matrix |
|
93 |
form and returned back. The calculation of rmsd values can take into |
|
94 |
account all hydrogens, none, or only the ones not linked to carbon atoms. |
|
95 |
""" |
|
96 |
if mol.GetNumConformers() < 2: |
|
97 |
err = "The provided molecule has less than 2 conformers" |
|
98 |
logger.error(err) |
|
99 |
raise ValueError(err) |
|
100 |
|
|
101 |
if not remove_Hs: |
|
102 |
pass |
|
103 |
elif remove_Hs or remove_Hs.lower() == "all": |
|
104 |
mol = Chem.RemoveHs(mol) |
|
105 |
elif remove_Hs.lower() == "c": |
|
106 |
mol = remove_C_linked_Hs(mol) |
|
107 |
else: |
|
108 |
pass |
|
109 |
|
|
110 |
num_confs = mol.GetNumConformers() |
|
111 |
conf_ids = list(range(num_confs)) |
|
112 |
rmsd_mtx = np.zeros((num_confs, num_confs)) |
|
66 | 113 |
for conf1 in conf_ids: |
67 | 114 |
for conf2 in conf_ids[conf1 + 1:]: |
68 |
try: |
|
69 |
rmsd = Chem.GetBestRMS(mol_wo_Hs, mol_wo_Hs, prbId=conf2, |
|
70 |
refId=conf1) |
|
71 |
except ValueError: |
|
72 |
continue |
|
73 |
else: |
|
74 |
if rmsd <= rmsd_thld: |
|
75 |
mol.RemoveConformer(conf2) |
|
115 |
rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1) |
|
116 |
rmsd_mtx[conf1][conf2] = rmsd |
|
117 |
rmsd_mtx[conf2][conf1] = rmsd |
|
118 |
|
|
119 |
return rmsd_mtx |
|
76 | 120 |
|
121 |
|
|
122 |
def get_moments_of_inertia(mol: Chem.rdchem.Mol): |
|
123 |
"""Computes the moments of inertia of the given conformers |
|
124 |
|
|
125 |
@param mol: rdkit mol object of the relevant molecule. |
|
126 |
@return numpy array 2D: The inner array contains the moments of inertia for |
|
127 |
the three principal axis of a given conformer. They are ordered by its value |
|
128 |
in ascending order. The outer tuple loops over the conformers. |
|
129 |
""" |
|
130 |
from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3 |
|
131 |
|
|
132 |
return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)] |
|
133 |
for conf in range(mol.GetNumConformers())]) |
|
134 |
|
|
135 |
|
|
136 |
def mmff_opt_confs(mol: Chem.rdchem.Mol): |
|
137 |
"""Optimizes the geometry of the given conformers and returns the new mol |
|
138 |
object and the energies of its conformers. |
|
139 |
|
|
140 |
@param mol: rdkit mol object of the relevant molecule. |
|
141 |
@return mol: rdkit mol object of the optimized molecule. |
|
142 |
@return numpy.ndarray: Array with the energies of the optimized conformers. |
|
143 |
|
|
144 |
The MMFF forcefield is used for the geometry optimization in its rdkit |
|
145 |
implementation. If the geometry does not converge for a certain conformer, |
|
146 |
the latter is removed from the list of conformers and its energy is not |
|
147 |
included in the returned list. |
|
148 |
""" |
|
149 |
from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs |
|
150 |
|
|
151 |
init_num_confs = mol.GetNumConformers() |
|
152 |
results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0, |
|
153 |
maxIters=2000, |
|
154 |
nonBondedThresh=10)) |
|
155 |
for i, conv in enumerate(results[:,0]): |
|
156 |
if conv != 0: |
|
157 |
mol.RemoveConformer(i) |
|
77 | 158 |
for i, conf in enumerate(mol.GetConformers()): |
78 | 159 |
conf.SetId(i) |
160 |
if mol.GetNumConformers() < init_num_confs: |
|
161 |
logger.warning(f'MMFF Geometry optimization did not comverge for at ' |
|
162 |
f'least one conformer. Continuing with ' |
|
163 |
f'{mol.GetNumConformers()} converged conformers') |
|
79 | 164 |
|
80 |
logger.info(f'Generated {len(mol.GetConformers())} different conformers') |
|
81 |
return mol |
|
165 |
return mol, np.array([res[1] for res in results if res[0] == 0]) |
|
82 | 166 |
|
83 | 167 |
|
84 | 168 |
def run_isolated(inp_vars): |
... | ... | |
89 | 173 |
""" |
90 | 174 |
logger.info('Carrying out procedures for the isolated molecule') |
91 | 175 |
rd_mol = adapt_format('rdkit', inp_vars['molec_file']) |
92 |
confs = gen_confs(rd_mol, inp_vars['num_conformers'], inp_vars['iso_rmsd']) |
|
176 |
confs = gen_confs(rd_mol, inp_vars['num_conformers']) |
|
177 |
|
|
178 |
|
|
93 | 179 |
|
Formats disponibles : Unified diff