Révision 92924d24 modules/isolated.py

b/modules/isolated.py
1 1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2 2

  
3 3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of independent mol objects.
4 6
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of different conformers in random orientations.
7
gen_confs: Generate a number of conformers in random orientations.
8
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
9
get_moments_of_inertia: Computes moments of inertia of the given conformers.
10
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
11
    new mol object and the energies of its conformers.
6 12
run_isolated: directs the execution of functions to achieve the goal
7 13
"""
8 14
import logging
9 15

  
16
import numpy as np
17
from rdkit.Chem import AllChem as Chem
18

  
10 19
from formats import adapt_format
11 20

  
12 21
logger = logging.getLogger('DockOnSurf')
13 22

  
14 23

  
15
def remove_C_linked_Hs(mol):
24
def confs_to_mol_list(mol: Chem.rdchem.Mol):
25
    """Converts the conformers inside a rdkit mol object to a list of
26
    separate mol objects.
27

  
28
    @param mol: rdkit mol object containing at least one conformer.
29
    @return list: list of separate mol objects.
30
    """
31
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=conf.GetId()))
32
            for conf in mol.GetConformers()]
33

  
34

  
35
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
16 36
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
17 37

  
18
    @param mol: rdkit mol object of the molecule with hydrogens.
19
    @return: rdkit mol object of the molecule without hydrogens linked to carbon
20
    atoms.
21
    The functions removes the hydrogens bonded to C atoms while keeping the
22
    ones bonded to heteroatoms.
38
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
39
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
40
    a carbon atom.
41

  
42
    The functions removes the hydrogen atoms bonded to carbon atoms while
43
    keeping the ones bonded to other atoms or non-bonded at all.
23 44
    """
24
    from rdkit import Chem
25 45

  
26 46
    mol = Chem.RWMol(mol)
27 47
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
......
36 56
    return mol
37 57

  
38 58

  
39
def gen_confs(mol, num_confs, rmsd_thld, local_min=True):
40
    """Generate a number of different conformers in random orientations.
59
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
60
    """Generate conformers in random orientations.
41 61

  
42 62
    @param mol: rdkit mol object of the molecule to be adsorbed.
43
    @param num_confs: number of raw conformers to randomly generate.
44
    @param rmsd_thld: rmsd threshold to consider two conformers as different.
63
    @param num_confs: number of conformers to randomly generate.
45 64
    @param local_min: bool: if generated conformers should be a local minimum.
46 65
    @return: mol: rdkit mol object containing the different conformers.
66
             rmsd_mtx: triangular matrix with the rmsd values of conformers.
47 67

  
48
    Using the rdkit library, conformers are randomly generated.
49
    If structures are required to be local minima, ie. setting the 'local_min'
50
    value to True, a geometry optimisation using UFF is performed.
51
    Identical conformers are removed when, by evaluating the RMSD of
52
    two structures without carbon-bonded hydrogens, their value lies below the
53
    'rmsd_thld'.
54
    The resulting conformers are returned back formatted as rdkit mol object.
68
    Using the rdkit library, conformers are randomly generated. If structures 
69
    are required to be local minima, ie. setting the 'local_min' value to 
70
    True, a geometry optimisation using UFF is performed.
55 71
    """
56
    from rdkit.Chem import AllChem as Chem
57
    logger.info('Generating Conformers')
72
    logger.debug('Generating Conformers')
58 73

  
59 74
    conf_ids = Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
60 75
    if local_min:
......
62 77
            Chem.UFFOptimizeMolecule(mol, confId=conf)
63 78

  
64 79
    Chem.AlignMolConformers(mol)
65
    mol_wo_Hs = remove_C_linked_Hs(mol)
80
    logger.info(f'Generated {len(mol.GetConformers())} conformers')
81
    return mol
82

  
83
    
84
def get_rmsd(mol: Chem.rdchem.Mol, remove_Hs="c"):
85
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
86

  
87
    @param mol: rdkit mol object containing at least two conformers.
88
    @param remove_Hs: bool or str,
89
    @return rmsd_matrix: Matrix containing the rmsd values of every pair of
90
    conformers.
91

  
92
    The RMSD values of every pair of conformers is computed, stored in matrix
93
    form and returned back. The calculation of rmsd values can take into
94
    account all hydrogens, none, or only the ones not linked to carbon atoms.
95
    """
96
    if mol.GetNumConformers() < 2:
97
        err = "The provided molecule has less than 2 conformers"
98
        logger.error(err)
99
        raise ValueError(err)
100

  
101
    if not remove_Hs:
102
        pass
103
    elif remove_Hs or remove_Hs.lower() == "all":
104
        mol = Chem.RemoveHs(mol)
105
    elif remove_Hs.lower() == "c":
106
        mol = remove_C_linked_Hs(mol)
107
    else:
108
        pass
109

  
110
    num_confs = mol.GetNumConformers()
111
    conf_ids = list(range(num_confs))
112
    rmsd_mtx = np.zeros((num_confs, num_confs))
66 113
    for conf1 in conf_ids:
67 114
        for conf2 in conf_ids[conf1 + 1:]:
68
            try:
69
                rmsd = Chem.GetBestRMS(mol_wo_Hs, mol_wo_Hs, prbId=conf2,
70
                                      refId=conf1)
71
            except ValueError:
72
                continue
73
            else:
74
                if rmsd <= rmsd_thld:
75
                    mol.RemoveConformer(conf2)
115
            rmsd = Chem.GetBestRMS(mol, mol, prbId=conf2, refId=conf1)
116
            rmsd_mtx[conf1][conf2] = rmsd
117
            rmsd_mtx[conf2][conf1] = rmsd
118

  
119
    return rmsd_mtx
76 120

  
121

  
122
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
123
    """Computes the moments of inertia of the given conformers
124

  
125
    @param mol: rdkit mol object of the relevant molecule.
126
    @return numpy array 2D: The inner array contains the moments of inertia for
127
    the three principal axis of a given conformer. They are ordered by its value
128
    in ascending order. The outer tuple loops over the conformers.
129
    """
130
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
131

  
132
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
133
                     for conf in range(mol.GetNumConformers())])
134

  
135

  
136
def mmff_opt_confs(mol: Chem.rdchem.Mol):
137
    """Optimizes the geometry of the given conformers and returns the new mol
138
    object and the energies of its conformers.
139

  
140
    @param mol: rdkit mol object of the relevant molecule.
141
    @return mol: rdkit mol object of the optimized molecule.
142
    @return numpy.ndarray: Array with the energies of the optimized conformers.
143

  
144
    The MMFF forcefield is used for the geometry optimization in its rdkit
145
    implementation. If the geometry does not converge for a certain conformer,
146
    the latter is removed from the list of conformers and its energy is not
147
    included in the returned list.
148
    """
149
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
150

  
151
    init_num_confs = mol.GetNumConformers()
152
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
153
                                                maxIters=2000,
154
                                                nonBondedThresh=10))
155
    for i, conv in enumerate(results[:,0]):
156
        if conv != 0:
157
            mol.RemoveConformer(i)
77 158
    for i, conf in enumerate(mol.GetConformers()):
78 159
        conf.SetId(i)
160
    if mol.GetNumConformers() < init_num_confs:
161
        logger.warning(f'MMFF Geometry optimization did not comverge for at '
162
                       f'least one conformer. Continuing with '
163
                       f'{mol.GetNumConformers()} converged conformers')
79 164

  
80
    logger.info(f'Generated {len(mol.GetConformers())} different conformers')
81
    return mol
165
    return mol, np.array([res[1] for res in results if res[0] == 0])
82 166

  
83 167

  
84 168
def run_isolated(inp_vars):
......
89 173
    """
90 174
    logger.info('Carrying out procedures for the isolated molecule')
91 175
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
92
    confs = gen_confs(rd_mol, inp_vars['num_conformers'], inp_vars['iso_rmsd'])
176
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
177

  
178

  
93 179

  

Formats disponibles : Unified diff