Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 5fb01677

Historique | Voir | Annoter | Télécharger (6,22 ko)

1
"""Module for the conversion between atomic coordinates files and objects
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of separate mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7
adapt_format: Converts the coordinate files into a required library object type.
8
read_coords: Reads the atomic coordinates resulting from finished calculations.
9
"""
10

    
11
import logging
12

    
13
import rdkit.Chem.AllChem as Chem
14

    
15
logger = logging.getLogger('DockOnSurf')
16

    
17

    
18
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19
    """Converts the conformers inside a rdkit mol object to a list of
20
    separate mol objects.
21

22
    @param mol: rdkit mol object containing at least one conformer.
23
    @param idx_lst: list of conformer indices to be considered. If not passed,
24
        all conformers are considered.
25
    @return: list of separate mol objects.
26
    """
27
    if idx_lst is None:
28
        idx_lst = list(range(mol.GetNumConformers()))
29
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30
                                 removeHs=False) for idx in idx_lst]
31

    
32

    
33
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34
    """Converts a rdkit mol object into ase Atoms object.
35
    @param mol: rdkit mol object containing only one conformer.
36
    @return ase.Atoms: ase Atoms object with the same coordinates.
37
    """
38
    from ase import Atoms
39
    if mol.GetNumConformers() > 1:
40
        logger.warning('A mol object with multiple conformers is parsed, '
41
                       'converting to Atoms only the first conformer')
42
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43
    positions = mol.GetConformer(0).GetPositions()
44
    return Atoms(symbols=symbols, positions=positions)
45

    
46

    
47
def adapt_format(requirement, coord_file):
48
    """Converts the coordinate files into a required library object type.
49

50
    Depending on the library required to use and the file type, it converts the
51
    coordinate file into a library-workable object.
52
    @param requirement: str, the library for which the conversion should be
53
    made. Accepted values: 'ase', 'rdkit'.
54
    @param coord_file: str, path to the coordinates file aiming to convert.
55
    Accepted file tyoes: 'xyz', 'mol'.
56
    @return: an object the required library can work with.
57
    """
58
    import ase.io
59
    from ase.io.formats import filetype
60

    
61
    req_vals = ['rdkit', 'ase']
62
    file_type_vals = ['xyz', 'mol']
63
    lib_err = f"The conversion to the '{requirement}' library object type" \
64
              f" has not yet been implemented"
65
    conv_info = f"Converted {coord_file} to {requirement} object type"
66

    
67
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
68

    
69
    if requirement not in req_vals:
70
        logger.error(lib_err)
71
        raise NotImplementedError(lib_err)
72

    
73
    if filetype(coord_file) not in file_type_vals:
74
        logger.error(fil_type_err)
75
        raise NotImplementedError(fil_type_err)
76

    
77
    if requirement == 'rdkit':
78
        if filetype(coord_file) == 'xyz':
79
            from modules.xyz2mol import xyz2mol
80
            ase_atms = ase.io.read(coord_file)
81
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
82
            xyz_coordinates = ase_atms.positions.tolist()
83
            # TODO Add routine to read charge
84
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
85
            logger.debug(conv_info)
86
            return Chem.AddHs(rd_mol_obj)
87
        elif filetype(coord_file) == 'mol':
88
            logger.debug(conv_info)
89
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
90

    
91
    if requirement == 'ase':
92
        if filetype(coord_file) == 'xyz':
93
            logger.debug(conv_info)
94
            return ase.io.read(coord_file)
95
        elif filetype(coord_file) == 'mol':
96
            logger.debug(conv_info)
97
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
98
            return rdkit_mol_to_ase_atoms(rd_mol)
99

    
100

    
101
def read_coords(code, run_type, req):
102
    """Reads the atomic coordinates resulting from finished calculations.
103

104
    Given a run_type ('isolated', 'screening' or 'refinement') directory
105
    containing different subdirectories with finished calculations in every
106
    subdirectory, it reads, from each subirectory, the final coordinates
107
    resulting from the calculation and returns a list of objects adequate to the
108
    required library.
109

110
    @param code: the code that produced the calculation results files.
111
    @param run_type: the type of calculation (and also the name of the folder)
112
                     containing the calculation subdirectories.
113
    @param req: The required library object type to make the list of (eg. rdkit,
114
                ase)
115
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
116
    """
117
    import os
118
    if code == 'cp2k':
119
        pattern = '-pos-1.xyz'
120
    else:
121
        pattern = ''
122
    return [adapt_format(req, f'{run_type}/{conf}/{fil}')
123
            for conf in os.listdir(run_type)
124
            for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil]
125

    
126

    
127
def read_energies(code, run_type):
128
    """Reads the energies resulting from finished calculations.
129

130
    Given a run_type ('isolated', 'screening' or 'refinement') directory
131
    containing different subdirectories with finished calculations in every
132
    subdirectory, it reads the final energies of calculations inside each
133
    subdirectory.
134

135
    @param code: the code that produced the calculation results files.
136
    @param run_type: the type of calculation (and also the name of the folder)
137
                     containing the calculation subdirectories.
138
    @return: list of energies
139
    """
140
    import os
141
    import numpy as np
142
    from modules.utilities import tail
143

    
144
    energies = []
145
    if code == 'cp2k':
146
        pattern = '-pos-1.xyz'
147
        for conf in os.listdir(run_type):
148
            for fil in os.listdir(f"{run_type}/{conf}"):
149
                if pattern in fil:
150
                    traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb')
151
                    num_atoms = int(traj_fh.readline().strip())
152
                    last_geo = tail(traj_fh, num_atoms + 2).splitlines()
153
                    for line in last_geo:
154
                        if 'E =' in line:
155
                            energies.append(float(line.split('E =')[1]))
156

    
157
    return np.array(energies)