Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ f8c4eafe

Historique | Voir | Annoter | Télécharger (5,97 ko)

1
"""Module for the conversion between atomic coordinates files and objects
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of separate mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7
adapt_format: Converts the coordinate files into a required library object type.
8
read_coords: Reads the atomic coordinates resulting from finished calculations.
9
"""
10

    
11
import logging
12

    
13
import rdkit.Chem.AllChem as Chem
14

    
15
logger = logging.getLogger('DockOnSurf')
16

    
17

    
18
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19
    """Converts the conformers inside a rdkit mol object to a list of
20
    separate mol objects.
21

22
    @param mol: rdkit mol object containing at least one conformer.
23
    @param idx_lst: list of conformer indices to be considered. If not passed,
24
        all conformers are considered.
25
    @return: list of separate mol objects.
26
    """
27
    if idx_lst is None:
28
        idx_lst = list(range(mol.GetNumConformers()))
29
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30
                                 removeHs=False) for idx in idx_lst]
31

    
32

    
33
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34
    """Converts a rdkit mol object into ase Atoms object.
35
    @param mol: rdkit mol object containing only one conformer.
36
    @return ase.Atoms: ase Atoms object with the same coordinates.
37
    """
38
    from ase import Atoms
39
    if mol.GetNumConformers() > 1:
40
        logger.warning('A mol object with multiple conformers is parsed, '
41
                       'converting to Atoms only the first conformer')
42
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43
    positions = mol.GetConformer(0).GetPositions()
44
    return Atoms(symbols=symbols, positions=positions)
45

    
46

    
47
def adapt_format(requirement, coord_file):
48
    """Converts the coordinate files into a required library object type.
49

50
    Depending on the library required to use and the file type, it converts the
51
    coordinate file into a library-workable object.
52
    @param requirement: str, the library for which the conversion should be
53
    made. Accepted values: 'ase', 'rdkit'.
54
    @param coord_file: str, path to the coordinates file aiming to convert.
55
    Accepted file tyoes: 'xyz', 'mol'.
56
    @return: an object the required library can work with.
57
    """
58
    import ase.io
59
    from ase.io.formats import filetype
60

    
61
    req_vals = ['rdkit', 'ase']
62
    file_type_vals = ['xyz', 'mol']
63
    lib_err = f"The conversion to the '{requirement}' library object type" \
64
              f" has not yet been implemented"
65
    conv_info = f"Converted {coord_file} to {requirement} object type"
66

    
67
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
68

    
69
    if requirement not in req_vals:
70
        logger.error(lib_err)
71
        raise NotImplementedError(lib_err)
72

    
73
    if filetype(coord_file) not in file_type_vals:
74
        logger.error(fil_type_err)
75
        raise NotImplementedError(fil_type_err)
76

    
77
    if requirement == 'rdkit':
78
        if filetype(coord_file) == 'xyz':
79
            from xyz2mol import xyz2mol
80
            ase_atms = ase.io.read(coord_file)
81
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
82
            xyz_coordinates = ase_atms.positions.tolist()
83
            # TODO Add routine to read charge
84
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
85
            logger.debug(conv_info)
86
            return rd_mol_obj
87
        elif filetype(coord_file) == 'mol':
88
            from rdkit.Chem import MolFromMolFile
89
            logger.debug(conv_info)
90
            return MolFromMolFile(coord_file, removeHs=False)
91

    
92
    if requirement == 'ase':
93
        logger.debug(conv_info)
94
        return ase.io.read(coord_file)
95

    
96

    
97
def read_coords(code, run_type, req):
98
    """Reads the atomic coordinates resulting from finished calculations.
99

100
    Given a run_type ('isolated', 'screening' or 'refinement') directory
101
    containing different subdirectories with finished calculations in every
102
    subdirectory, it reads, from each subirectory, the final coordinates
103
    resulting from the calculation and returns a list of objects adequate to the
104
    required library.
105

106
    @param code: the code that produced the calculation results files.
107
    @param run_type: the type of calculation (and also the name of the folder)
108
                     containing the calculation subdirectories.
109
    @param req: The required library object type to make the list of (eg. rdkit,
110
                ase)
111
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
112
    """
113
    import os
114
    if code == 'cp2k':
115
        pattern = '-pos-1.xyz'
116
    else:
117
        pattern = ''
118
    return [adapt_format(req, f'{run_type}/{conf}/{fil}')
119
            for conf in os.listdir(run_type)
120
            for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil]
121

    
122

    
123
def read_energies(code, run_type):
124
    """Reads the energies resulting from finished calculations.
125

126
    Given a run_type ('isolated', 'screening' or 'refinement') directory
127
    containing different subdirectories with finished calculations in every
128
    subdirectory, it reads the final energies of calculations inside each
129
    subdirectory.
130

131
    @param code: the code that produced the calculation results files.
132
    @param run_type: the type of calculation (and also the name of the folder)
133
                     containing the calculation subdirectories.
134
    @return: list of energies
135
    """
136
    import os
137
    import numpy as np
138
    from utilities import tail
139

    
140
    energies = []
141
    if code == 'cp2k':
142
        pattern = '-pos-1.xyz'
143
        for conf in os.listdir(run_type):
144
            for fil in os.listdir(f"{run_type}/{conf}"):
145
                if pattern in fil:
146
                    traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb')
147
                    num_atoms = int(traj_fh.readline().strip())
148
                    last_geo = tail(traj_fh, num_atoms + 2).splitlines()
149
                    for line in last_geo:
150
                        if 'E =' in line:
151
                            energies.append(float(line.split('E =')[1]))
152

    
153
    return np.array(energies)