Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ e1c5f171

Historique | Voir | Annoter | Télécharger (9,71 ko)

1
"""Module for the interconversion between different kinds of atomic data.
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5
    of separate rdkit.Mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7
add_special_atoms: Allows ase to use custom elements with symbols not in the
8
    periodic table.
9
adapt_format: Converts the coordinate files into a required library object type.
10
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11
    ase.Atoms object.
12
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14
collect_energies: Directs the reading of energies on a set of subdirectories.
15
"""
16

    
17
import logging
18

    
19
import rdkit.Chem.AllChem as Chem
20

    
21
logger = logging.getLogger('DockOnSurf')
22

    
23

    
24
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25
    """Converts the conformers inside a rdkit mol object to a list of
26
    separate mol objects.
27

28
    @param mol: rdkit mol object containing at least one conformer.
29
    @param idx_lst: list of conformer indices to be considered. If not passed,
30
        all conformers are considered.
31
    @return: list of separate mol objects.
32
    """
33
    if idx_lst is None:
34
        idx_lst = list(range(mol.GetNumConformers()))
35
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
36
                                 removeHs=False) for idx in idx_lst]
37

    
38

    
39
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
40
    """Converts a rdkit mol object into ase Atoms object.
41
    @param mol: rdkit mol object containing only one conformer.
42
    @return ase.Atoms: ase Atoms object with the same coordinates.
43
    """
44
    from ase import Atoms
45
    if mol.GetNumConformers() > 1:
46
        logger.warning('A mol object with multiple conformers is parsed, '
47
                       'converting to Atoms only the first conformer.')
48
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
49
    positions = mol.GetConformer(0).GetPositions()
50
    return Atoms(symbols=symbols, positions=positions)
51

    
52

    
53
def add_special_atoms(symbol_pairs):
54
    """Allows ase to use custom elements with symbols not in the periodic table.
55

56
    This function adds new chemical elements to be used by ase. Every new custom
57
    element must have a traditional (present in the periodic table) partner
58
    from which to obtain all its properties.
59
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
60
        Every tuple contains a pair of chemical symbols, the first label must be
61
        the label of the custom element and the second one the symbol of the
62
        reference one (traditional present on the periodic table).
63
    @return:
64
    """
65
    import numpy as np
66
    from ase import data
67
    for i, pair in enumerate(symbol_pairs):
68
        data.chemical_symbols += [pair[0]]
69
        z_orig = data.atomic_numbers[pair[1]]
70
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
71
        orig_com_mass = data.atomic_masses_common[z_orig]
72
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
73
        data.atomic_names += [pair[0]]
74
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
75
                                                 orig_iupac_mass)
76
        data.atomic_masses = data.atomic_masses_iupac2016
77
        data.atomic_masses_common = np.append(data.atomic_masses_common,
78
                                              orig_com_mass)
79
        data.covalent_radii = np.append(data.covalent_radii,
80
                                        data.covalent_radii[z_orig])
81
        data.reference_states += [data.reference_states[z_orig]]
82
        # TODO Add vdw_radii, gsmm and aml (smaller length)
83

    
84

    
85
def adapt_format(requirement, coord_file, spec_atms=tuple()):
86
    """Converts the coordinate files into a required library object type.
87

88
    Depending on the library required to use and the file type, it converts the
89
    coordinate file into a library-workable object.
90
    @param requirement: str, the library for which the conversion should be
91
    made. Accepted values: 'ase', 'rdkit'.
92
    @param coord_file: str, path to the coordinates file aiming to convert.
93
    Accepted file tyoes: 'xyz', 'mol'.
94
    @param spec_atms: List of tuples containing pairs of new/traditional
95
        chemical symbols.
96
    @return: an object the required library can work with.
97
    """
98
    import ase.io
99
    from ase.io.formats import filetype
100

    
101
    req_vals = ['rdkit', 'ase']
102
    file_type_vals = ['xyz', 'mol']
103
    lib_err = f"The conversion to the '{requirement}' library object type" \
104
              f" has not yet been implemented"
105
    conv_info = f"Converted {coord_file} to {requirement} object type"
106

    
107
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
108

    
109
    if requirement not in req_vals:
110
        logger.error(lib_err)
111
        raise NotImplementedError(lib_err)
112

    
113
    if filetype(coord_file) not in file_type_vals:
114
        logger.error(fil_type_err)
115
        raise NotImplementedError(fil_type_err)
116

    
117
    if requirement == 'rdkit':
118
        if filetype(coord_file) == 'xyz':
119
            from modules.xyz2mol import xyz2mol
120
            ase_atms = ase.io.read(coord_file)
121
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
122
            xyz_coordinates = ase_atms.positions.tolist()
123
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
124
            logger.debug(conv_info)
125
            return Chem.AddHs(rd_mol_obj)
126
        elif filetype(coord_file) == 'mol':
127
            logger.debug(conv_info)
128
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
129

    
130
    if requirement == 'ase':
131
        add_special_atoms(spec_atms)
132
        if filetype(coord_file) == 'xyz':
133
            logger.debug(conv_info)
134
            return ase.io.read(coord_file)
135
        elif filetype(coord_file) == 'mol':
136
            logger.debug(conv_info)
137
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
138
            return rdkit_mol_to_ase_atoms(rd_mol)
139

    
140

    
141
def read_coords_cp2k(file, spec_atoms=tuple()):
142
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
143
     object.
144

145
    @param file: The file to read containing the coordinates.
146
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
147
    @return: ase.Atoms object of the coordinates in the file.
148
    """
149
    import numpy as np
150
    from ase import Atoms
151
    from pycp2k import CP2K
152

    
153
    cp2k = CP2K()
154
    cp2k.parse(file)
155
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
156
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
157
    symbols = [atom.split()[0] for atom in raw_coords]
158
    positions = np.array([atom.split()[1:] for atom in raw_coords])
159
    if len(spec_atoms) > 0:
160
        add_special_atoms(spec_atoms)  # TODO check usage
161
    return Atoms(symbols=symbols, positions=positions)
162

    
163

    
164
def collect_coords(conf_list, code, run_type, spec_atms=tuple()):
165
    """Directs the reading of coordinates on a set of subdirectories.
166

167
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
168
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
169
    calculations produced by a given code, stored in every conf_X subdirectory,
170
    it collects the coordinates of the specified conf_X subdirectories in a
171
    single run_type by calling the adequate function (depending on the code) and
172
    returns a list of ase.Atoms objects.
173

174
    @param conf_list: List of directories where to read the coords from.
175
    @param code: the code that produced the calculation results files.
176
    @param run_type: the type of calculation (and also the name of the folder)
177
                     containing the calculation subdirectories.
178
    @param spec_atms: List of tuples containing pairs of new/traditional
179
        chemical symbols.
180
    @return: list of ase.Atoms objects.
181
    """
182
    from glob import glob
183
    atoms_list = []
184
    for conf in conf_list:
185
        if code == 'cp2k':
186
            atoms_list.append(read_coords_cp2k(glob(f"{run_type}/{conf}/*-1"
187
                                                    f".restart")[0], spec_atms))
188
        # elif code == 'vasp'
189
    return atoms_list
190

    
191

    
192
def read_energy_cp2k(file):
193
    """Reads the CP2K out file and returns its final energy.
194

195
    @param file: The file from which the energy should be read.
196
    @return: The last energy on the out file.
197
    """
198
    out_fh = open(file, 'r')
199
    energy = None
200
    for line in out_fh:
201
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
202
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
203
    out_fh.close()
204
    return energy
205

    
206

    
207
def collect_energies(conf_list, code, run_type):
208
    """Directs the reading of energies on a set of subdirectories.
209

210
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
211
    (run_type = ['isolated', 'screening' or 'refinement']) with finished
212
    calculations produced by a given code, stored in every conf_X subdirectory,
213
    it collects the energies of the specified conf_X subdirectories in a
214
    single run_type by calling the adequate function (depending on the code) and
215
    returns a list of energies.
216

217
    @param conf_list: List of directories where to read the energy.
218
    @param code: The code that produced the calculation output files.
219
    @param run_type: The type of calculation (and also the name of the folder)
220
                     containing the calculation subdirectories.
221
    @return: List of energies
222
    """
223
    from glob import glob
224
    import numpy as np
225

    
226
    energies = []
227
    for conf in conf_list:
228
        if code == 'cp2k':
229
            energies.append(read_energy_cp2k(
230
                glob(f"{run_type}/{conf}/*.out")[0]))
231

    
232
    if len(energies) == 0:
233
        err = f"No results found on {run_type}"
234
        logger.error(err)
235
        raise FileNotFoundError(err)
236

    
237
    return np.array(energies)