Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 695dcff8

Historique | Voir | Annoter | Télécharger (6,25 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of conformers in random orientations.
6
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
7
get_moments_of_inertia: Computes moments of inertia of the given conformers.
8
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
9
    new mol object and the energies of its conformers.
10
run_isolated: directs the execution of functions to achieve the goal
11
"""
12
import os
13
import logging
14

    
15
import numpy as np
16
import rdkit.Chem.AllChem as Chem
17

    
18
logger = logging.getLogger('DockOnSurf')
19

    
20

    
21
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
22
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
23

24
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
25
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
26
    a carbon atom.
27

28
    The functions removes the hydrogen atoms bonded to carbon atoms while
29
    keeping the ones bonded to other atoms or non-bonded at all.
30
    """
31

    
32
    mol = Chem.RWMol(mol)
33
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
34

    
35
    for atm_idx in rev_atm_idxs:
36
        atom = mol.GetAtomWithIdx(atm_idx)
37
        if atom.GetAtomicNum() != 1:
38
            continue
39
        for neigh in atom.GetNeighbors():
40
            if neigh.GetAtomicNum() == 6:
41
                mol.RemoveAtom(atom.GetIdx())
42
    return mol
43

    
44

    
45
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int, local_min=True):
46
    """Generate conformers in random orientations.
47

48
    @param mol: rdkit mol object of the molecule to be adsorbed.
49
    @param num_confs: number of conformers to randomly generate.
50
    @param local_min: bool: if generated conformers should be a local minimum.
51
    @return: mol: rdkit mol object containing the different conformers.
52
             rmsd_mtx: Matrix with the rmsd values of conformers.
53

54
    Using the rdkit library, conformers are randomly generated. If structures 
55
    are required to be local minima, ie. setting the 'local_min' value to 
56
    True, a geometry optimisation using UFF is performed.
57
    """
58
    logger.debug('Generating Conformers.')
59

    
60
    mol = Chem.AddHs(mol)
61
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
62
    Chem.AlignMolConformers(mol)
63
    logger.info(f'Generated {len(mol.GetConformers())} conformers.')
64
    return mol
65

    
66

    
67
def get_moments_of_inertia(mol: Chem.rdchem.Mol):  # TODO Rethink its usage
68
    """Computes the moments of inertia of the given conformers
69

70
    @param mol: rdkit mol object of the relevant molecule.
71
    @return numpy array 2D: The inner array contains the moments of inertia for
72
    the three principal axis of a given conformer. They are ordered by its value
73
    in ascending order. The outer tuple loops over the conformers.
74
    """
75
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
76

    
77
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
78
                     for conf in range(mol.GetNumConformers())])
79

    
80

    
81
def mmff_opt_confs(mol: Chem.rdchem.Mol, max_iters=2000):
82
    """Optimizes the geometry of the given conformers and returns the new mol
83
    object and the energies of its conformers.
84

85
    @param mol: rdkit mol object of the relevant molecule.
86
    @param max_iters: Maximum number of geometry optimization iterations. With 0
87
    a single point energy calculation is performed and only the conformer
88
    energies are returned.
89
    @return mol: rdkit mol object of the optimized molecule.
90
    @return numpy.ndarray: Array with the energies of the optimized conformers.
91

92
    The MMFF forcefield is used for the geometry optimization in its rdkit
93
    implementation. With max_iters value set to 0, a single point energy
94
    calculation is performed and only the energies are returned. For values
95
    larger than 0, if the geometry does not converge for a certain conformer,
96
    the latter is removed from the list of conformers and its energy is not
97
    included in the returned list.
98
    """
99
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs
100

    
101
    init_num_confs = mol.GetNumConformers()
102
    results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
103
                                                 maxIters=max_iters,
104
                                                 nonBondedThresh=10))
105

    
106
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
107
    # return all conformers if optimization is switched off, ie. maxIters = 0
108
    if max_iters > 0:
109
        for i, conv in enumerate(results[:, 0]):
110
            if conv != 0:
111
                mol.RemoveConformer(i)
112
        for i, conf in enumerate(mol.GetConformers()):
113
            conf.SetId(i)
114
        if mol.GetNumConformers() < init_num_confs:
115
            logger.warning(f'MMFF Geometry optimization did not comverge for at'
116
                           f'least one conformer. Continuing with '
117
                           f'{mol.GetNumConformers()} converged conformers.')
118
        logger.info(f'Pre-optimized conformers with MMFF.')
119
        return mol, np.array([res[1] for res in results if res[0] == 0])
120
    else:
121
        logger.info(f'Computed conformers energy with MMFF.')
122
        return np.array([res[1] for res in results])
123

    
124

    
125
def run_isolated(inp_vars):
126
    """Directs the execution of functions to obtain the conformers to adsorb
127

128
    @param inp_vars: Calculation parameters from input file.
129
    @return:
130
    """
131
    from modules.formats import adapt_format, confs_to_mol_list, \
132
        rdkit_mol_to_ase_atoms
133
    from modules.clustering import clustering, get_rmsd
134
    from modules.calculation import run_calc
135

    
136
    logger.info('Carrying out procedures for the isolated molecule.')
137
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'])
138
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
139
    if inp_vars['min_confs']:
140
        confs, confs_ener = mmff_opt_confs(confs)
141
    else:
142
        confs_ener = mmff_opt_confs(confs, max_iters=0)
143
    conf_list = confs_to_mol_list(confs)
144
    rmsd_mtx = get_rmsd(conf_list)
145
    confs_moi = get_moments_of_inertia(confs)
146
    exemplars = clustering(rmsd_mtx)
147
    mol_list = confs_to_mol_list(confs, exemplars)
148
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
149
    run_calc('isolated', inp_vars, ase_atms_list)