Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 34a03ee1

Historique | Voir | Annoter | Télécharger (7,79 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of independent mol objects.
6
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7
gen_confs: Generate a number of conformers in random orientations.
8
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11
    new mol object and the energies of its conformers.
12
run_isolated: directs the execution of functions to achieve the goal
13
"""
14
import logging
15

    
16
import numpy as np
17
from rdkit.Chem import AllChem as Chem
18

    
19
from formats import adapt_format
20

    
21
logger = logging.getLogger('DockOnSurf')
22

    
23

    
24
def confs_to_mol_list(mol: Chem.rdchem.Mol):
25
    """Converts the conformers inside a rdkit mol object to a list of
26
    separate mol objects.
27

28
    @param mol: rdkit mol object containing at least one conformer.
29
    @return list: list of separate mol objects.
30
    """
31
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
32
            for conf in mol.GetConformers()]
33

    
34

    
35
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
36
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
37

38
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
39
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
40
    a carbon atom.
41

42
    The functions removes the hydrogen atoms bonded to carbon atoms while
43
    keeping the ones bonded to other atoms or non-bonded at all.
44
    """
45

    
46
    mol = Chem.RWMol(mol)
47
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
48

    
49
    for atm_idx in rev_atm_idxs:
50
        atom = mol.GetAtomWithIdx(atm_idx)
51
        if atom.GetAtomicNum() != 1:
52
            continue
53
        for neigh in atom.GetNeighbors():
54
            if neigh.GetAtomicNum() == 6:
55
                mol.RemoveAtom(atom.GetIdx())
56
    return mol
57

    
58

    
59
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
60
    """Generate conformers in random orientations.
61

62
    @param mol: rdkit mol object of the molecule to be adsorbed.
63
    @param num_confs: number of conformers to randomly generate.
64
    @param local_min: bool: if generated conformers should be a local minimum.
65
    @return: mol: rdkit mol object containing the different conformers.
66
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
67

68
    Using the rdkit library, conformers are randomly generated. If structures 
69
    are required to be local minima, ie. setting the 'local_min' value to 
70
    True, a geometry optimisation using UFF is performed.
71
    """
72
    logger.debug('Generating Conformers')
73

    
74
    conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
75
    if local_min:
76
        for conf in conf_ids:
77
            Chem.UFFOptimizeMolecule(mol, confId=conf)
78

    
79
    Chem.AlignMolConformers(mol)
80
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
81
    return mol
82

    
83

    
84
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
85
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
86

87
    @param mol: rdkit mol object containing at least two conformers.
88
    @param remove_Hs: bool or str,
89
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
90
    conformers.
91

92
    The RMSD values of every pair of conformers is computed, stored in matrix
93
    form and returned back. The calculation of rmsd values can take into
94
    account all hydrogens, none, or only the ones not linked to carbon atoms.
95
    """
96
    if mol.GetNumConformers() < 2:
97
        err = "The provided molecule has less than 2 conformers"
98
        logger.error(err)
99
        raise ValueError(err)
100

    
101
    if not remove_Hs:
102
        pass
103
    elif remove_Hs or remove_Hs.lower() == "all":
104
        mol = Chem.RemoveHs(mol)
105
    elif remove_Hs.lower() == "c":
106
        mol = remove_C_linked_Hs(mol)
107
    else:
108
        pass
109

    
110
    num_confs = mol.GetNumConformers()
111
    conf_ids = list(range(num_confs))
112
    rmsd_mtx = np.zeros((num_confs, num_confs))
113
    for conf1 in conf_ids:
114
        for conf2 in conf_ids[conf1 + 1:]:
115
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
116
            rmsd_mtx[conf1][conf2] = rmsd
117
            rmsd_mtx[conf2][conf1] = rmsd
118

    
119
    return rmsd_mtx
120

    
121

    
122
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
123
    """Computes the moments of inertia of the given conformers
124

125
    @param mol: rdkit mol object of the relevant molecule.
126
    @return numpy array 2D: The inner array contains the moments of inertia for
127
    the three principal axis of a given conformer. They are ordered by its value
128
    in ascending order. The outer tuple loops over the conformers.
129
    """
130
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
131

    
132
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
133
                     for conf in range(mol.GetNumConformers())])
134

    
135

    
136
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
137
    """Optimizes the geometry of the given conformers and returns the new mol
138
    object and the energies of its conformers.
139

140
    @param mol: rdkit mol object of the relevant molecule.
141
    @param max_iters: Maximum number of geometry optimization iterations. With 0
142
    a single point energy calculation is performed and only the conformer
143
    energies are returned.
144
    @return mol: rdkit mol object of the optimized molecule.
145
    @return numpy.ndarray: Array with the energies of the optimized conformers.
146

147
    The MMFF forcefield is used for the geometry optimization in its rdkit
148
    implementation. With max_iters value set to 0, a single point energy
149
    calculation is performed and only the energies are returned. For values
150
    larger than 0, if the geometry does not converge for a certain conformer,
151
    the latter is removed from the list of conformers and its energy is not
152
    included in the returned list.
153
    """
154
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
155

    
156
    init_num_confs = mol.GetNumConformers()
157
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
158
                                                 maxIters=max_iters,
159
                                                 nonBondedThresh=10))
160

    
161
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
162
    # return all conformers if optimization is switched off, ie. maxIters = 0
163
    if max_iters > 0:
164
        for i, conv in enumerate(results[:, 0]):
165
            if conv != 0:
166
                mol.RemoveConformer(i)
167
        for i, conf in enumerate(mol.GetConformers()):
168
            conf.SetId(i)
169
        if mol.GetNumConformers() < init_num_confs:
170
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
171
                           f'least one conformer. Continuing with '
172
                           f'{mol.GetNumConformers()} converged conformers')
173
        logger.info(f'Optimized conformers with MMFF.')
174
        return mol, np.array([res[1] for res in results if res[0] == 0])
175
    else:
176
        logger.info(f'Computed conformers energy with MMFF.')
177
        return np.array([res[1] for res in results])
178

    
179

    
180
def run_isolated(inp_vars):
181
    """Directs the execution of functions to obtain the conformers to adsorb
182

183
    @param inp_vars:
184
    @return:
185
    """
186
    # from clustering import *
187
    logger.info('Carrying out procedures for the isolated molecule')
188
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
189
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
190
    rmsd_mtx = get_rmsd(confs)
191

    
192
    if 'moi' in inp_vars['cluster_magns']:
193
        confs_moi = get_moments_of_inertia(confs)
194

    
195
    if 'energy' in inp_vars['cluster_magns']:
196
        if inp_vars['min_confs']:
197
            confs, confs_eners = mmff_opt_confs(confs)
198
        else:
199
            confs_eners = mmff_opt_confs(confs, max_iters=0)
200

    
201
    # clustering2()