Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ f3004731

Historique | Voir | Annoter | Télécharger (7,78 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of independent mol objects.
6
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7
gen_confs: Generate a number of conformers in random orientations.
8
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11
    new mol object and the energies of its conformers.
12
run_isolated: directs the execution of functions to achieve the goal
13
"""
14
import os
15
import logging
16

    
17
import numpy as np
18
from rdkit.Chem import AllChem as Chem
19

    
20
logger = logging.getLogger('DockOnSurf')
21

    
22

    
23
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
24
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
25

26
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
27
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
28
    a carbon atom.
29

30
    The functions removes the hydrogen atoms bonded to carbon atoms while
31
    keeping the ones bonded to other atoms or non-bonded at all.
32
    """
33

    
34
    mol = Chem.RWMol(mol)
35
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
36

    
37
    for atm_idx in rev_atm_idxs:
38
        atom = mol.GetAtomWithIdx(atm_idx)
39
        if atom.GetAtomicNum() != 1:
40
            continue
41
        for neigh in atom.GetNeighbors():
42
            if neigh.GetAtomicNum() == 6:
43
                mol.RemoveAtom(atom.GetIdx())
44
    return mol
45

    
46

    
47
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
48
    """Generate conformers in random orientations.
49

50
    @param mol: rdkit mol object of the molecule to be adsorbed.
51
    @param num_confs: number of conformers to randomly generate.
52
    @param local_min: bool: if generated conformers should be a local minimum.
53
    @return: mol: rdkit mol object containing the different conformers.
54
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
55

56
    Using the rdkit library, conformers are randomly generated. If structures 
57
    are required to be local minima, ie. setting the 'local_min' value to 
58
    True, a geometry optimisation using UFF is performed.
59
    """
60
    logger.debug('Generating Conformers')
61

    
62
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
63
    Chem.AlignMolConformers(mol)
64
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
65
    return mol
66

    
67

    
68
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
69
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
70

71
    @param mol: rdkit mol object containing at least two conformers.
72
    @param remove_Hs: bool or str,
73
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
74
    conformers.
75

76
    The RMSD values of every pair of conformers is computed, stored in matrix
77
    form and returned back. The calculation of rmsd values can take into
78
    account all hydrogens, none, or only the ones not linked to carbon atoms.
79
    """
80
    if mol.GetNumConformers() < 2:
81
        err = "The provided molecule has less than 2 conformers"
82
        logger.error(err)
83
        raise ValueError(err)
84

    
85
    if not remove_Hs:
86
        pass
87
    elif remove_Hs or remove_Hs.lower() == "all":
88
        mol = Chem.RemoveHs(mol)
89
    elif remove_Hs.lower() == "c":
90
        mol = remove_C_linked_Hs(mol)
91
    else:
92
        err = "remove_Hs value does not have an acceptable value"
93
        logger.error(err)
94
        raise ValueError(err)
95

    
96
    num_confs = mol.GetNumConformers()
97
    conf_ids = list(range(num_confs))
98
    rmsd_mtx = np.zeros((num_confs, num_confs))
99
    for conf1 in conf_ids:
100
        for conf2 in conf_ids[conf1 + 1:]:
101
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
102
            rmsd_mtx[conf1][conf2] = rmsd
103
            rmsd_mtx[conf2][conf1] = rmsd
104

    
105
    return rmsd_mtx
106

    
107

    
108
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
109
    """Computes the moments of inertia of the given conformers
110

111
    @param mol: rdkit mol object of the relevant molecule.
112
    @return numpy array 2D: The inner array contains the moments of inertia for
113
    the three principal axis of a given conformer. They are ordered by its value
114
    in ascending order. The outer tuple loops over the conformers.
115
    """
116
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
117

    
118
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
119
                     for conf in range(mol.GetNumConformers())])
120

    
121

    
122
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
123
    """Optimizes the geometry of the given conformers and returns the new mol
124
    object and the energies of its conformers.
125

126
    @param mol: rdkit mol object of the relevant molecule.
127
    @param max_iters: Maximum number of geometry optimization iterations. With 0
128
    a single point energy calculation is performed and only the conformer
129
    energies are returned.
130
    @return mol: rdkit mol object of the optimized molecule.
131
    @return numpy.ndarray: Array with the energies of the optimized conformers.
132

133
    The MMFF forcefield is used for the geometry optimization in its rdkit
134
    implementation. With max_iters value set to 0, a single point energy
135
    calculation is performed and only the energies are returned. For values
136
    larger than 0, if the geometry does not converge for a certain conformer,
137
    the latter is removed from the list of conformers and its energy is not
138
    included in the returned list.
139
    """
140
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
141

    
142
    init_num_confs = mol.GetNumConformers()
143
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
144
                                                 maxIters=max_iters,
145
                                                 nonBondedThresh=10))
146

    
147
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
148
    # return all conformers if optimization is switched off, ie. maxIters = 0
149
    if max_iters > 0:
150
        for i, conv in enumerate(results[:, 0]):
151
            if conv != 0:
152
                mol.RemoveConformer(i)
153
        for i, conf in enumerate(mol.GetConformers()):
154
            conf.SetId(i)
155
        if mol.GetNumConformers() < init_num_confs:
156
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
157
                           f'least one conformer. Continuing with '
158
                           f'{mol.GetNumConformers()} converged conformers')
159
        logger.info(f'Optimized conformers with MMFF.')
160
        return mol, np.array([res[1] for res in results if res[0] == 0])
161
    else:
162
        logger.info(f'Computed conformers energy with MMFF.')
163
        return np.array([res[1] for res in results])
164

    
165

    
166
def run_isolated(inp_vars):
167
    """Directs the execution of functions to obtain the conformers to adsorb
168

169
    @param inp_vars: Calculation parameters from input file.
170
    @return:
171
    """
172
    from modules.formats import adapt_format, confs_to_mol_list, \
173
        rdkit_mol_to_ase_atoms
174
    from modules.clustering import clustering
175
    from modules.calculation import run_calc
176

    
177
    logger.info('Carrying out procedures for the isolated molecule')
178
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
179
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
180
    if inp_vars['min_confs']:
181
        confs, confs_ener = mmff_opt_confs(confs)
182
    else:
183
        confs_ener = mmff_opt_confs(confs, max_iters=0)
184
    rmsd_mtx = get_rmsd(confs)
185
    confs_moi = get_moments_of_inertia(confs)
186
    exemplars = clustering(rmsd_mtx)
187
    mol_list = confs_to_mol_list(confs, exemplars)
188
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
189
    run_calc('isolated', inp_vars, ase_atms_list)
190

    
191
    if 'moi' in inp_vars['cluster_magns']:
192
        confs_moi = get_moments_of_inertia(confs)
193

    
194
    # if 'energy' in inp_vars['cluster_magns']: