Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / isolated.py @ 07edc24f

Historique | Voir | Annoter | Télécharger (6,91 ko)

1
"""Functions to generate the conformers to be adsorbed and the most stable one.
2

3
functions:
4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5
gen_confs: Generate a number of conformers in random orientations.
6
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
7
get_moments_of_inertia: Computes moments of inertia of the given conformers.
8
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
9
    new mol object and the energies of its conformers.
10
run_isolated: directs the execution of functions to achieve the goal
11
"""
12
import logging
13

    
14
import numpy as np
15
import rdkit.Chem.AllChem as Chem
16

    
17
logger = logging.getLogger('DockOnSurf')
18

    
19

    
20
def remove_C_linked_Hs(mol: Chem.rdchem.Mol):
21
    """Removes hydrogen atoms bonded to a carbon atom from a rdkit mol object.
22

23
    @param mol: rdkit mol object of the molecule with hydrogen atoms.
24
    @return: rdkit mol object of the molecule without hydrogen atoms linked to
25
    a carbon atom.
26

27
    The functions removes the hydrogen atoms bonded to carbon atoms while
28
    keeping the ones bonded to other atoms or non-bonded at all.
29
    """
30

    
31
    mol = Chem.RWMol(mol)
32
    rev_atm_idxs = [atom.GetIdx() for atom in reversed(mol.GetAtoms())]
33

    
34
    for atm_idx in rev_atm_idxs:
35
        atom = mol.GetAtomWithIdx(atm_idx)
36
        if atom.GetAtomicNum() != 1:
37
            continue
38
        for neigh in atom.GetNeighbors():
39
            if neigh.GetAtomicNum() == 6:
40
                mol.RemoveAtom(atom.GetIdx())
41
    return mol
42

    
43

    
44
def gen_confs(mol: Chem.rdchem.Mol, num_confs: int):
45
    """Generate conformers in random orientations.
46

47
    @param mol: rdkit mol object of the molecule to be adsorbed.
48
    @param num_confs: number of conformers to randomly generate.
49
    @return: mol: rdkit mol object containing the different conformers.
50
             rmsd_mtx: Matrix with the rmsd values of conformers.
51

52
    Using the rdkit library, conformers are randomly generated. If structures 
53
    are required to be local minima, ie. setting the 'local_min' value to 
54
    True, a geometry optimisation using UFF is performed.
55
    """
56
    logger.debug('Generating Conformers.')
57

    
58
    mol = Chem.AddHs(mol)
59
    Chem.EmbedMultipleConfs(mol, numConfs=num_confs, numThreads=0,
60
                            randomSeed=5817216)
61
    Chem.AlignMolConformers(mol)
62
    logger.info(f'Generated {len(mol.GetConformers())} conformers.')
63
    return mol
64

    
65

    
66
def get_moments_of_inertia(mol: Chem.rdchem.Mol):  # TODO Rethink its usage
67
    """Computes the moments of inertia of the given conformers
68

69
    @param mol: rdkit mol object of the relevant molecule.
70
    @return numpy array 2D: The inner array contains the moments of inertia for
71
    the three principal axis of a given conformer. They are ordered by its value
72
    in ascending order. The outer tuple loops over the conformers.
73
    """
74
    from rdkit.Chem.Descriptors3D import PMI1, PMI2, PMI3
75

    
76
    return np.array([[PMI(mol, confId=conf) for PMI in (PMI1, PMI2, PMI3)]
77
                     for conf in range(mol.GetNumConformers())])
78

    
79

    
80
def pre_opt_confs(mol: Chem.rdchem.Mol, force_field='MMFF', max_iters=2000):
81
    """Optimizes the geometry of the given conformers and returns the new mol
82
    object and the energies of its conformers.
83

84
    @param mol: rdkit mol object of the relevant molecule.
85
    @param force_field: Force Field to use for the pre-optimization.
86
    @param max_iters: Maximum number of geometry optimization iterations. With 0
87
    a single point energy calculation is performed and only the conformer
88
    energies are returned.
89
    @return mol: rdkit mol object of the optimized molecule.
90
    @return numpy.ndarray: Array with the energies of the optimized conformers.
91

92
    The MMFF and UFF force fields can be used for the geometry optimization in
93
    their rdkit implementation. With max_iters value set to 0, a single point
94
    energy calculation is performed and only the energies are returned. For
95
    values larger than 0, if the geometry does not converge for a certain
96
    conformer, the latter is removed from the list of conformers and its energy
97
    is not included in the returned list.
98
    """
99
    from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMoleculeConfs, \
100
        UFFOptimizeMoleculeConfs
101

    
102
    init_num_confs = mol.GetNumConformers()
103
    if force_field == 'mmff':
104
        results = np.array(MMFFOptimizeMoleculeConfs(mol, numThreads=0,
105
                                                     maxIters=max_iters,
106
                                                     nonBondedThresh=10))
107
    elif force_field == 'uff':
108
        results = np.array(UFFOptimizeMoleculeConfs(mol, numThreads=0,
109
                                                    maxIters=max_iters,
110
                                                    vdwThresh=10))
111
    else:
112
        logger.error("force_field parameter must be 'MMFF' or 'UFF'")
113
        raise ValueError("force_field parameter must be 'MMFF' or 'UFF'")
114

    
115
    # Remove non-converged conformers if optimization is on, ie. maxIters > 0
116
    # return all conformers if optimization is switched off, ie. maxIters = 0
117
    if max_iters > 0:
118
        for i, conv in enumerate(results[:, 0]):
119
            if conv != 0:
120
                mol.RemoveConformer(i)
121
        for i, conf in enumerate(mol.GetConformers()):
122
            conf.SetId(i)
123
        if mol.GetNumConformers() < init_num_confs:
124
            logger.warning(f'Geometry optimization did not comverge for at'
125
                           f'least one conformer. Continuing with '
126
                           f'{mol.GetNumConformers()} converged conformers.')
127
        logger.info(f'Pre-optimized conformers with {force_field}.')
128
        return mol, np.array([res[1] for res in results if res[0] == 0])
129
    else:
130
        logger.info(f'Computed conformers energy with {force_field}.')
131
        return np.array([res[1] for res in results])
132

    
133

    
134
def run_isolated(inp_vars):
135
    """Directs the execution of functions to obtain the conformers to adsorb
136

137
    @param inp_vars: Calculation parameters from input file.
138
    @return:
139
    """
140
    from modules.formats import adapt_format, confs_to_mol_list, \
141
        rdkit_mol_to_ase_atoms
142
    from modules.clustering import clustering, get_rmsd
143
    from modules.calculation import run_calc
144

    
145
    logger.info('Carrying out procedures for the isolated molecule.')
146
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'],
147
                          inp_vars['special_atoms'])
148
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
149
    if inp_vars['pre_opt']:
150
        confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
151
    else:
152
        confs_ener = pre_opt_confs(confs, max_iters=0)
153
    conf_list = confs_to_mol_list(confs)
154
    rmsd_mtx = get_rmsd(conf_list)
155
    confs_moi = get_moments_of_inertia(confs)
156
    exemplars = clustering(rmsd_mtx)
157
    mol_list = confs_to_mol_list(confs, exemplars)
158
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
159
    run_calc('isolated', inp_vars, ase_atms_list)
160
    logger.info('Finished the procedures for the isolated molecule section.')