Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ fd33bfdb

Historique | Voir | Annoter | Télécharger (8,69 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of independent mol objects.
6
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
7
gen_confs: Generate a number of conformers in random orientations.
8
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11
    new mol object and the energies of its conformers.
12
run_isolated: directs the execution of functions to achieve the goal
13
"""
14
import os
15
import logging
16

    
17
import numpy as np
18
from rdkit.Chem import AllChem as Chem
19

    
20
logger = logging.getLogger('DockOnSurf')
21

    
22

    
23
def confs_to_mol_list(mol: Chem.rdchem.Mol):
24
    """Converts the conformers inside a rdkit mol object to a list of
25
    separate mol objects.
26

27
    @param mol: rdkit mol object containing at least one conformer.
28
    @return list: list of separate mol objects.
29
    """
30
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
31
            for conf in mol.GetConformers()]
32

    
33

    
34
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
35
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
36

37
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
38
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
39
    a carbon atom.
40

41
    The functions removes the hydrogen atoms bonded to carbon atoms while
42
    keeping the ones bonded to other atoms or non-bonded at all.
43
    """
44

    
45
    mol = Chem.RWMol(mol)
46
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
47

    
48
    for atm_idx in rev_atm_idxs:
49
        atom = mol.GetAtomWithIdx(atm_idx)
50
        if atom.GetAtomicNum() != 1:
51
            continue
52
        for neigh in atom.GetNeighbors():
53
            if neigh.GetAtomicNum() == 6:
54
                mol.RemoveAtom(atom.GetIdx())
55
    return mol
56

    
57

    
58
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
59
    """Generate conformers in random orientations.
60

61
    @param mol: rdkit mol object of the molecule to be adsorbed.
62
    @param num_confs: number of conformers to randomly generate.
63
    @param local_min: bool: if generated conformers should be a local minimum.
64
    @return: mol: rdkit mol object containing the different conformers.
65
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
66

67
    Using the rdkit library, conformers are randomly generated. If structures 
68
    are required to be local minima, ie. setting the 'local_min' value to 
69
    True, a geometry optimisation using UFF is performed.
70
    """
71
    logger.debug('Generating Conformers')
72

    
73
    conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
74
    if local_min:
75
        for conf in conf_ids:
76
            Chem.UFFOptimizeMolecule(mol, confId=conf)
77

    
78
    Chem.AlignMolConformers(mol)
79
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
80
    return mol
81

    
82

    
83
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
84
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
85

86
    @param mol: rdkit mol object containing at least two conformers.
87
    @param remove_Hs: bool or str,
88
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
89
    conformers.
90

91
    The RMSD values of every pair of conformers is computed, stored in matrix
92
    form and returned back. The calculation of rmsd values can take into
93
    account all hydrogens, none, or only the ones not linked to carbon atoms.
94
    """
95
    if mol.GetNumConformers() < 2:
96
        err = "The provided molecule has less than 2 conformers"
97
        logger.error(err)
98
        raise ValueError(err)
99

    
100
    if not remove_Hs:
101
        pass
102
    elif remove_Hs or remove_Hs.lower() == "all":
103
        mol = Chem.RemoveHs(mol)
104
    elif remove_Hs.lower() == "c":
105
        mol = remove_C_linked_Hs(mol)
106
    else:
107
        err = "remove_Hs value does not have an acceptable value"
108
        logger.error(err)
109
        raise ValueError(err)
110

    
111
    num_confs = mol.GetNumConformers()
112
    conf_ids = list(range(num_confs))
113
    rmsd_mtx = np.zeros((num_confs, num_confs))
114
    for conf1 in conf_ids:
115
        for conf2 in conf_ids[conf1 + 1:]:
116
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
117
            rmsd_mtx[conf1][conf2] = rmsd
118
            rmsd_mtx[conf2][conf1] = rmsd
119

    
120
    return rmsd_mtx
121

    
122

    
123
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
124
    """Computes the moments of inertia of the given conformers
125

126
    @param mol: rdkit mol object of the relevant molecule.
127
    @return numpy array 2D: The inner array contains the moments of inertia for
128
    the three principal axis of a given conformer. They are ordered by its value
129
    in ascending order. The outer tuple loops over the conformers.
130
    """
131
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
132

    
133
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
134
                     for conf in range(mol.GetNumConformers())])
135

    
136

    
137
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
138
    """Optimizes the geometry of the given conformers and returns the new mol
139
    object and the energies of its conformers.
140

141
    @param mol: rdkit mol object of the relevant molecule.
142
    @param max_iters: Maximum number of geometry optimization iterations. With 0
143
    a single point energy calculation is performed and only the conformer
144
    energies are returned.
145
    @return mol: rdkit mol object of the optimized molecule.
146
    @return numpy.ndarray: Array with the energies of the optimized conformers.
147

148
    The MMFF forcefield is used for the geometry optimization in its rdkit
149
    implementation. With max_iters value set to 0, a single point energy
150
    calculation is performed and only the energies are returned. For values
151
    larger than 0, if the geometry does not converge for a certain conformer,
152
    the latter is removed from the list of conformers and its energy is not
153
    included in the returned list.
154
    """
155
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
156

    
157
    init_num_confs = mol.GetNumConformers()
158
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
159
                                                 maxIters=max_iters,
160
                                                 nonBondedThresh=10))
161

    
162
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
163
    # return all conformers if optimization is switched off, ie. maxIters = 0
164
    if max_iters > 0:
165
        for i, conv in enumerate(results[:, 0]):
166
            if conv != 0:
167
                mol.RemoveConformer(i)
168
        for i, conf in enumerate(mol.GetConformers()):
169
            conf.SetId(i)
170
        if mol.GetNumConformers() < init_num_confs:
171
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
172
                           f'least one conformer. Continuing with '
173
                           f'{mol.GetNumConformers()} converged conformers')
174
        logger.info(f'Optimized conformers with MMFF.')
175
        return mol, np.array([res[1] for res in results if res[0] == 0])
176
    else:
177
        logger.info(f'Computed conformers energy with MMFF.')
178
        return np.array([res[1] for res in results])
179

    
180

    
181
def prep_run(inp_file, mol: Chem.rdchem.Mol, exemplars):
182
    from shutil import copy
183
    # Checking if 'isolated' directory already exists and if so backing it up.
184
    dir_name = 'isolated'
185
    bak_num = 0
186
    while dir_name in os.listdir("."):
187
        bak_num += 1
188
        dir_name = dir_name.split(".")[0]+f".bak{bak_num}"
189
    if bak_num > 0:
190
        os.rename('isolated', dir_name)
191
        logger.warning("'isolated' directory already present. Moved former "
192
                       f"directory to {dir_name}")
193
    os.mkdir('isolated')
194

    
195
    # Creating
196
    for i, conf in enumerate(exemplars):
197
        os.mkdir(f'isolated/conf_{i}')
198
        copy(inp_file, f'isolated/conf_{i}/')
199
        Chem.MolToXYZFile(mol, f'isolated/conf_{i}/coord.xyz', confId=conf)
200

    
201

    
202
def run_isolated(inp_vars):
203
    """Directs the execution of functions to obtain the conformers to adsorb
204

205
    @param inp_vars:
206
    @return:
207
    """
208
    from formats import adapt_format
209
    from clustering import clustering
210

    
211
    # from clustering import *
212
    logger.info('Carrying out procedures for the isolated molecule')
213
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
214
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
215
    if inp_vars['min_confs']:
216
        confs, confs_ener = mmff_opt_confs(confs)
217
    rmsd_mtx = get_rmsd(confs)
218
    exemplars = clustering(rmsd_mtx)
219
    prep_run(inp_vars['isol_inp_file'], confs, exemplars)
220
    # run dft
221

    
222
    if 'moi' in inp_vars['cluster_magns']:
223
        confs_moi = get_moments_of_inertia(confs)
224

    
225
    # if 'energy' in inp_vars['cluster_magns']: