Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 54be2a9e

Historique | Voir | Annoter | Télécharger (7,9 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of independent mol objects.
6
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7
gen_confs: Generate a number of conformers in random orientations.
8
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11
    new mol object and the energies of its conformers.
12
run_isolated: directs the execution of functions to achieve the goal
13
"""
14
import logging
15

    
16
import numpy as np
17
from rdkit.Chem import AllChem as Chem
18

    
19
logger = logging.getLogger('DockOnSurf')
20

    
21

    
22
def confs_to_mol_list(mol: Chem.rdchem.Mol):
23
    """Converts the conformers inside a rdkit mol object to a list of
24
    separate mol objects.
25

26
    @param mol: rdkit mol object containing at least one conformer.
27
    @return list: list of separate mol objects.
28
    """
29
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
30
            for conf in mol.GetConformers()]
31

    
32

    
33
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
34
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
35

36
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
37
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
38
    a carbon atom.
39

40
    The functions removes the hydrogen atoms bonded to carbon atoms while
41
    keeping the ones bonded to other atoms or non-bonded at all.
42
    """
43

    
44
    mol = Chem.RWMol(mol)
45
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
46

    
47
    for atm_idx in rev_atm_idxs:
48
        atom = mol.GetAtomWithIdx(atm_idx)
49
        if atom.GetAtomicNum() != 1:
50
            continue
51
        for neigh in atom.GetNeighbors():
52
            if neigh.GetAtomicNum() == 6:
53
                mol.RemoveAtom(atom.GetIdx())
54
    return mol
55

    
56

    
57
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
58
    """Generate conformers in random orientations.
59

60
    @param mol: rdkit mol object of the molecule to be adsorbed.
61
    @param num_confs: number of conformers to randomly generate.
62
    @param local_min: bool: if generated conformers should be a local minimum.
63
    @return: mol: rdkit mol object containing the different conformers.
64
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
65

66
    Using the rdkit library, conformers are randomly generated. If structures 
67
    are required to be local minima, ie. setting the 'local_min' value to 
68
    True, a geometry optimisation using UFF is performed.
69
    """
70
    logger.debug('Generating Conformers')
71

    
72
    conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
73
    if local_min:
74
        for conf in conf_ids:
75
            Chem.UFFOptimizeMolecule(mol, confId=conf)
76

    
77
    Chem.AlignMolConformers(mol)
78
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
79
    return mol
80

    
81

    
82
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
83
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
84

85
    @param mol: rdkit mol object containing at least two conformers.
86
    @param remove_Hs: bool or str,
87
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
88
    conformers.
89

90
    The RMSD values of every pair of conformers is computed, stored in matrix
91
    form and returned back. The calculation of rmsd values can take into
92
    account all hydrogens, none, or only the ones not linked to carbon atoms.
93
    """
94
    if mol.GetNumConformers() < 2:
95
        err = "The provided molecule has less than 2 conformers"
96
        logger.error(err)
97
        raise ValueError(err)
98

    
99
    if not remove_Hs:
100
        pass
101
    elif remove_Hs or remove_Hs.lower() == "all":
102
        mol = Chem.RemoveHs(mol)
103
    elif remove_Hs.lower() == "c":
104
        mol = remove_C_linked_Hs(mol)
105
    else:
106
        err = "remove_Hs value does not have an acceptable value"
107
        logger.error(err)
108
        raise ValueError(err)
109

    
110
    num_confs = mol.GetNumConformers()
111
    conf_ids = list(range(num_confs))
112
    rmsd_mtx = np.zeros((num_confs, num_confs))
113
    for conf1 in conf_ids:
114
        for conf2 in conf_ids[conf1 + 1:]:
115
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
116
            rmsd_mtx[conf1][conf2] = rmsd
117
            rmsd_mtx[conf2][conf1] = rmsd
118

    
119
    return rmsd_mtx
120

    
121

    
122
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
123
    """Computes the moments of inertia of the given conformers
124

125
    @param mol: rdkit mol object of the relevant molecule.
126
    @return numpy array 2D: The inner array contains the moments of inertia for
127
    the three principal axis of a given conformer. They are ordered by its value
128
    in ascending order. The outer tuple loops over the conformers.
129
    """
130
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
131

    
132
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
133
                     for conf in range(mol.GetNumConformers())])
134

    
135

    
136
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
137
    """Optimizes the geometry of the given conformers and returns the new mol
138
    object and the energies of its conformers.
139

140
    @param mol: rdkit mol object of the relevant molecule.
141
    @param max_iters: Maximum number of geometry optimization iterations. With 0
142
    a single point energy calculation is performed and only the conformer
143
    energies are returned.
144
    @return mol: rdkit mol object of the optimized molecule.
145
    @return numpy.ndarray: Array with the energies of the optimized conformers.
146

147
    The MMFF forcefield is used for the geometry optimization in its rdkit
148
    implementation. With max_iters value set to 0, a single point energy
149
    calculation is performed and only the energies are returned. For values
150
    larger than 0, if the geometry does not converge for a certain conformer,
151
    the latter is removed from the list of conformers and its energy is not
152
    included in the returned list.
153
    """
154
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
155

    
156
    init_num_confs = mol.GetNumConformers()
157
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
158
                                                 maxIters=max_iters,
159
                                                 nonBondedThresh=10))
160

    
161
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
162
    # return all conformers if optimization is switched off, ie. maxIters = 0
163
    if max_iters > 0:
164
        for i, conv in enumerate(results[:, 0]):
165
            if conv != 0:
166
                mol.RemoveConformer(i)
167
        for i, conf in enumerate(mol.GetConformers()):
168
            conf.SetId(i)
169
        if mol.GetNumConformers() < init_num_confs:
170
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
171
                           f'least one conformer. Continuing with '
172
                           f'{mol.GetNumConformers()} converged conformers')
173
        logger.info(f'Optimized conformers with MMFF.')
174
        return mol, np.array([res[1] for res in results if res[0] == 0])
175
    else:
176
        logger.info(f'Computed conformers energy with MMFF.')
177
        return np.array([res[1] for res in results])
178

    
179

    
180
def run_isolated(inp_vars):
181
    """Directs the execution of functions to obtain the conformers to adsorb
182

183
    @param inp_vars:
184
    @return:
185
    """
186
    from formats import adapt_format
187

    
188
    # from clustering import *
189
    logger.info('Carrying out procedures for the isolated molecule')
190
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
191
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
192
    rmsd_mtx = get_rmsd(confs)
193

    
194
    if 'moi' in inp_vars['cluster_magns']:
195
        confs_moi = get_moments_of_inertia(confs)
196

    
197
    if 'energy' in inp_vars['cluster_magns']:
198
        if inp_vars['min_confs']:
199
            confs, confs_eners = mmff_opt_confs(confs)
200
        else:
201
            confs_eners = mmff_opt_confs(confs, max_iters=0)
202

    
203
    # clustering2()