Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 8279a51d

Historique | Voir | Annoter | Télécharger (6,87 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of conformers in random orientations.
6
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
7
get_moments_of_inertia: Computes moments of inertia of the given conformers.
8
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
9
    new mol object and the energies of its conformers.
10
run_isolated: directs the execution of functions to achieve the goal
11
"""
12
import logging
13

    
14
import numpy as np
15
import rdkit.Chem.AllChem as Chem
16

    
17
logger = logging.getLogger('DockOnSurf')
18

    
19

    
20
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
21
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
22

23
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
24
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
25
    a carbon atom.
26

27
    The functions removes the hydrogen atoms bonded to carbon atoms while
28
    keeping the ones bonded to other atoms or non-bonded at all.
29
    """
30

    
31
    mol = Chem.RWMol(mol)
32
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
33

    
34
    for atm_idx in rev_atm_idxs:
35
        atom = mol.GetAtomWithIdx(atm_idx)
36
        if atom.GetAtomicNum() != 1:
37
            continue
38
        for neigh in atom.GetNeighbors():
39
            if neigh.GetAtomicNum() == 6:
40
                mol.RemoveAtom(atom.GetIdx())
41
    return mol
42

    
43

    
44
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int):
45
    """Generate conformers in random orientations.
46

47
    @param mol: rdkit mol object of the molecule to be adsorbed.
48
    @param num_confs: number of conformers to randomly generate.
49
    @return: mol: rdkit mol object containing the different conformers.
50
             rmsd_mtx: Matrix with the rmsd values of conformers.
51

52
    Using the rdkit library, conformers are randomly generated. If structures 
53
    are required to be local minima, ie. setting the 'local_min' value to 
54
    True, a geometry optimisation using UFF is performed.
55
    """
56
    logger.debug('Generating Conformers.')
57

    
58
    mol = Chem.AddHs(mol)
59
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0)
60
    Chem.AlignMolConformers(mol)
61
    logger.info(f'Generated {len(mol.GetConformers())} conformers.')
62
    return mol
63

    
64

    
65
def get_moments_of_inertia(mol: Chem.rdchem.Mol):  # TODO Rethink its usage
66
    """Computes the moments of inertia of the given conformers
67

68
    @param mol: rdkit mol object of the relevant molecule.
69
    @return numpy array 2D: The inner array contains the moments of inertia for
70
    the three principal axis of a given conformer. They are ordered by its value
71
    in ascending order. The outer tuple loops over the conformers.
72
    """
73
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
74

    
75
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
76
                     for conf in range(mol.GetNumConformers())])
77

    
78

    
79
def pre_opt_confs(mol: Chem.rdchem.Mol, force_field='MMFF', max_iters=2000):
80
    """Optimizes the geometry of the given conformers and returns the new mol
81
    object and the energies of its conformers.
82

83
    @param mol: rdkit mol object of the relevant molecule.
84
    @param force_field: Force Field to use for the pre-optimization.
85
    @param max_iters: Maximum number of geometry optimization iterations. With 0
86
    a single point energy calculation is performed and only the conformer
87
    energies are returned.
88
    @return mol: rdkit mol object of the optimized molecule.
89
    @return numpy.ndarray: Array with the energies of the optimized conformers.
90

91
    The MMFF and UFF force fields can be used for the geometry optimization in
92
    their rdkit implementation. With max_iters value set to 0, a single point
93
    energy calculation is performed and only the energies are returned. For
94
    values larger than 0, if the geometry does not converge for a certain
95
    conformer, the latter is removed from the list of conformers and its energy
96
    is not included in the returned list.
97
    """
98
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs, \
99
        UFFOptimizeMoleculeConfs
100

    
101
    init_num_confs = mol.GetNumConformers()
102
    if force_field == 'mmff':
103
        results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
104
                                                     maxIters=max_iters,
105
                                                     nonBondedThresh=10))
106
    elif force_field == 'uff':
107
        results = np.array(UFFOptimizeMoleculeConfs(mol, numThreads=0,
108
                                                    maxIters=max_iters,
109
                                                    vdwThresh=10))
110
    else:
111
        logger.error("force_field parameter must be 'MMFF' or 'UFF'")
112
        raise ValueError("force_field parameter must be 'MMFF' or 'UFF'")
113

    
114
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
115
    # return all conformers if optimization is switched off, ie. maxIters = 0
116
    if max_iters > 0:
117
        for i, conv in enumerate(results[:, 0]):
118
            if conv != 0:
119
                mol.RemoveConformer(i)
120
        for i, conf in enumerate(mol.GetConformers()):
121
            conf.SetId(i)
122
        if mol.GetNumConformers() < init_num_confs:
123
            logger.warning(f'Geometry optimization did not comverge for at'
124
                           f'least one conformer. Continuing with '
125
                           f'{mol.GetNumConformers()} converged conformers.')
126
        logger.info(f'Pre-optimized conformers with {force_field}.')
127
        return mol, np.array([res[1] for res in results if res[0] == 0])
128
    else:
129
        logger.info(f'Computed conformers energy with {force_field}.')
130
        return np.array([res[1] for res in results])
131

    
132

    
133
def run_isolated(inp_vars):
134
    """Directs the execution of functions to obtain the conformers to adsorb
135

136
    @param inp_vars: Calculation parameters from input file.
137
    @return:
138
    """
139
    from modules.formats import adapt_format, confs_to_mol_list, \
140
        rdkit_mol_to_ase_atoms
141
    from modules.clustering import clustering, get_rmsd
142
    from modules.calculation import run_calc
143

    
144
    logger.info('Carrying out procedures for the isolated molecule.')
145
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'],
146
                          inp_vars['special_atoms'])
147
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
148
    if inp_vars['pre_opt']:
149
        confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
150
    else:
151
        confs_ener = pre_opt_confs(confs, max_iters=0)
152
    conf_list = confs_to_mol_list(confs)
153
    rmsd_mtx = get_rmsd(conf_list)
154
    confs_moi = get_moments_of_inertia(confs)
155
    exemplars = clustering(rmsd_mtx)
156
    mol_list = confs_to_mol_list(confs, exemplars)
157
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
158
    run_calc('isolated', inp_vars, ase_atms_list)
159
    logger.info('Finished the procedures for the isolated molecule section.')