Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 19567be2

Historique | Voir | Annoter | Télécharger (10,51 ko)

1 fd2384fc Carles Marti
"""Module for the interconversion between different kinds of atomic data.
2 e23f119b Carles

3 e23f119b Carles
functions:
4 fd2384fc Carles Marti
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5 fd2384fc Carles Marti
    of separate rdkit.Mol objects.
6 fd2384fc Carles Marti
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7 fd2384fc Carles Marti
add_special_atoms: Allows ase to use custom elements with symbols not in the
8 fd2384fc Carles Marti
    periodic table.
9 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
10 fd2384fc Carles Marti
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 fd2384fc Carles Marti
    ase.Atoms object.
12 fd2384fc Carles Marti
collect_coords: Directs the reading of coordinates on a set of subdirectories.
13 fd2384fc Carles Marti
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14 fd2384fc Carles Marti
collect_energies: Directs the reading of energies on a set of subdirectories.
15 e23f119b Carles
"""
16 e23f119b Carles
17 e23f119b Carles
import logging
18 f3004731 Carles
19 f3004731 Carles
import rdkit.Chem.AllChem as Chem
20 f3004731 Carles
21 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
22 e23f119b Carles
23 e23f119b Carles
24 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
25 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
26 f3004731 Carles
    separate mol objects.
27 f3004731 Carles

28 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
29 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
30 9510666f Carles
        all conformers are considered.
31 f3004731 Carles
    @return: list of separate mol objects.
32 f3004731 Carles
    """
33 f3004731 Carles
    if idx_lst is None:
34 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
35 f02e613b Carles Marti
    return [Chem.MolFromMolBlock(
36 f02e613b Carles Marti
        Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""),
37 f02e613b Carles Marti
        removeHs=False) for idx in idx_lst]
38 f3004731 Carles
39 f3004731 Carles
40 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
41 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
42 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
43 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
44 f3004731 Carles
    """
45 f3004731 Carles
    from ase import Atoms
46 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
47 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
48 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
49 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
50 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
51 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
52 f3004731 Carles
53 f3004731 Carles
54 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
55 fd2384fc Carles Marti
    """Allows ase to use custom elements with symbols not in the periodic table.
56 cc92f6ee Carles Marti

57 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
58 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
59 cc92f6ee Carles Marti
    from which to obtain all its properties.
60 fd2384fc Carles Marti
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
61 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
62 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
63 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
64 cc92f6ee Carles Marti
    @return:
65 587dca22 Carles Marti
    """  # TODO Enable special atoms for rdkit
66 cc92f6ee Carles Marti
    import numpy as np
67 cc92f6ee Carles Marti
    from ase import data
68 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
69 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
70 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
71 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
72 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
73 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
74 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
75 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
76 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
77 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
78 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
79 cc92f6ee Carles Marti
                                              orig_com_mass)
80 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
81 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
82 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
83 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
84 cc92f6ee Carles Marti
85 cc92f6ee Carles Marti
86 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
87 e23f119b Carles
    """Converts the coordinate files into a required library object type.
88 e23f119b Carles

89 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
90 e23f119b Carles
    coordinate file into a library-workable object.
91 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
92 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
93 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
94 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
95 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
96 90819cc3 Carles Marti
        chemical symbols.
97 e23f119b Carles
    @return: an object the required library can work with.
98 748a6036 Carles Martí
    """
99 439ce5f7 Carles
    import ase.io
100 8ab593ee Carles
    from ase.io.formats import filetype
101 8ab593ee Carles
102 748a6036 Carles Martí
    from modules.utilities import try_command
103 748a6036 Carles Martí
104 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
105 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
106 4381145e Carles
              f" has not yet been implemented"
107 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
108 4381145e Carles
109 748a6036 Carles Martí
    fil_type_err = f'The {filetype(coord_file)} file format is not supported'
110 4381145e Carles
111 4381145e Carles
    if requirement not in req_vals:
112 9f7bb440 Carles
        logger.error(lib_err)
113 4381145e Carles
        raise NotImplementedError(lib_err)
114 4381145e Carles
115 8ab593ee Carles
    if requirement == 'rdkit':
116 748a6036 Carles Martí
        from modules.xyz2mol import xyz2mol
117 748a6036 Carles Martí
        if filetype(coord_file) == 'xyz':  # TODO Include detection of charges.
118 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
119 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
120 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
121 748a6036 Carles Martí
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates)
122 8d08beb4 Carles
            logger.debug(conv_info)
123 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
124 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
125 8d08beb4 Carles
            logger.debug(conv_info)
126 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
127 748a6036 Carles Martí
        else:
128 748a6036 Carles Martí
            ase_atms = try_command(ase.io.read,
129 748a6036 Carles Martí
                                   [(ase.io.formats.UnknownFileTypeError,
130 748a6036 Carles Martí
                                     fil_type_err)],
131 748a6036 Carles Martí
                                   coord_file)
132 748a6036 Carles Martí
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
133 748a6036 Carles Martí
            xyz_coordinates = ase_atms.positions.tolist()
134 748a6036 Carles Martí
            return xyz2mol(atomic_nums, xyz_coordinates)
135 8ab593ee Carles
136 8ab593ee Carles
    if requirement == 'ase':
137 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
138 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
139 21e2cca5 Carles Marti
            logger.debug(conv_info)
140 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
141 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
142 21e2cca5 Carles Marti
            logger.debug(conv_info)
143 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
144 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
145 748a6036 Carles Martí
        else:
146 748a6036 Carles Martí
            return try_command(ase.io.read,
147 748a6036 Carles Martí
                               [(ase.io.formats.UnknownFileTypeError,
148 748a6036 Carles Martí
                                 fil_type_err)],
149 748a6036 Carles Martí
                               coord_file)
150 b6f47f2d Carles
151 b6f47f2d Carles
152 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
153 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
154 ed01b5b7 Carles Marti
     object.
155 ed01b5b7 Carles Marti

156 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
157 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
158 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
159 ed01b5b7 Carles Marti
    """
160 ed01b5b7 Carles Marti
    import numpy as np
161 ed01b5b7 Carles Marti
    from ase import Atoms
162 ed01b5b7 Carles Marti
    from pycp2k import CP2K
163 ed01b5b7 Carles Marti
164 ed01b5b7 Carles Marti
    cp2k = CP2K()
165 ed01b5b7 Carles Marti
    cp2k.parse(file)
166 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
167 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
168 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
169 1d8c374e Carles Martí
    positions = np.array([[float(coord) for coord in atom.split()[1:]]
170 1d8c374e Carles Martí
                          for atom in raw_coords])
171 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
172 4387ce69 Carles Marti
        add_special_atoms(spec_atoms)
173 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
174 ed01b5b7 Carles Marti
175 ed01b5b7 Carles Marti
176 cdc1edbe Carles Martí
def read_coords_vasp(file, spec_atoms=tuple()):
177 cdc1edbe Carles Martí
    """Reads the coordinates from VASP OUTCAR and returns an ase.Atoms object.
178 cdc1edbe Carles Martí

179 cdc1edbe Carles Martí
    @param file: The file to read containing the coordinates.
180 cdc1edbe Carles Martí
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
181 cdc1edbe Carles Martí
    @return: ase.Atoms object of the coordinates in the file.
182 cdc1edbe Carles Martí
    """
183 cdc1edbe Carles Martí
    import os
184 cdc1edbe Carles Martí
    import ase.io
185 cdc1edbe Carles Martí
    if not os.path.isfile(file):
186 cdc1edbe Carles Martí
        err_msg = f"File {file} not found."
187 cdc1edbe Carles Martí
        logger.error(err_msg)
188 cdc1edbe Carles Martí
        raise FileNotFoundError(err_msg)
189 cdc1edbe Carles Martí
    if len(spec_atoms) > 0:
190 cdc1edbe Carles Martí
        add_special_atoms(spec_atoms)
191 cdc1edbe Carles Martí
    return ase.io.read(file)
192 cdc1edbe Carles Martí
193 cdc1edbe Carles Martí
194 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
195 1d8c374e Carles Martí
    """Reads the CP2K output file and returns its final energy.
196 ed01b5b7 Carles Marti

197 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
198 1d8c374e Carles Martí
    @return: The last energy on the out file in eV.
199 ed01b5b7 Carles Marti
    """
200 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
201 ed01b5b7 Carles Marti
    energy = None
202 ed01b5b7 Carles Marti
    for line in out_fh:
203 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
204 9371fd91 Carles Marti
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
205 ed01b5b7 Carles Marti
    out_fh.close()
206 ed01b5b7 Carles Marti
    return energy
207 ed01b5b7 Carles Marti
208 ed01b5b7 Carles Marti
209 1d8c374e Carles Martí
def collect_confs(dir_list, code, run_type, spec_atms=tuple()):
210 1d8c374e Carles Martí
    """Reads the coordinates and energies of a list of finished calculations.
211 f8c4eafe Carles

212 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
213 1d8c374e Carles Martí
    (run_type = ['isolated', 'screening' or 'refinement']) it reads the
214 1d8c374e Carles Martí
    coordinates of each conf_X, it assigns its total energy from the calculation
215 1d8c374e Carles Martí
    and assigns the conf_X label to track its origin. Finally it returns the
216 1d8c374e Carles Martí
    ase.Atoms object.
217 1d8c374e Carles Martí

218 1d8c374e Carles Martí
    @param dir_list: List of directories where to read the coords from.
219 1d8c374e Carles Martí
    @param code: the code that produced the calculation results files.
220 1d8c374e Carles Martí
    @param run_type: the type of calculation (and also the name of the folder)
221 1d8c374e Carles Martí
        containing the calculation subdirectories.
222 1d8c374e Carles Martí
    @param spec_atms: List of tuples containing pairs of new/traditional
223 1d8c374e Carles Martí
        chemical symbols.
224 1d8c374e Carles Martí
    @return: list of ase.Atoms objects.
225 f8c4eafe Carles
    """
226 fd2384fc Carles Marti
    from glob import glob
227 1d8c374e Carles Martí
    import os
228 1d8c374e Carles Martí
    from modules.utilities import is_binary
229 1d8c374e Carles Martí
    atoms_list = []
230 1d8c374e Carles Martí
    for conf_dir in dir_list:
231 1d8c374e Carles Martí
        conf_path = f"{run_type}/{conf_dir}/"
232 ed01b5b7 Carles Marti
        if code == 'cp2k':
233 1d8c374e Carles Martí
            ase_atms = read_coords_cp2k(glob(f"{conf_path}/*-1.restart")[0],
234 1d8c374e Carles Martí
                                        spec_atms)
235 1d8c374e Carles Martí
            # Assign energy
236 1d8c374e Carles Martí
            for fil in os.listdir(conf_path):
237 1d8c374e Carles Martí
                if is_binary(conf_path+fil):
238 1d8c374e Carles Martí
                    continue
239 1d8c374e Carles Martí
                conf_energy = read_energy_cp2k(conf_path+fil)
240 1d8c374e Carles Martí
                if conf_energy is not None:
241 1d8c374e Carles Martí
                    ase_atms.info["energy"] = conf_energy
242 1d8c374e Carles Martí
                    break
243 1d8c374e Carles Martí
            ase_atms.info[run_type[:3]] = conf_dir
244 1d8c374e Carles Martí
            atoms_list.append(ase_atms)
245 1d8c374e Carles Martí
        elif code == 'vasp':
246 1d8c374e Carles Martí
            ase_atms = read_coords_vasp(f"{conf_path}/OUTCAR", spec_atms)
247 1d8c374e Carles Martí
            ase_atms.info["energy"] = ase_atms.get_total_energy() * 27.2113845
248 1d8c374e Carles Martí
            ase_atms.info[run_type[:3]] = conf_dir
249 1d8c374e Carles Martí
            atoms_list.append(ase_atms)
250 cdc1edbe Carles Martí
        else:
251 1d8c374e Carles Martí
            err_msg = f"Collect coordinates not implemented for '{code}'."
252 cdc1edbe Carles Martí
            logger.error(err_msg)
253 cdc1edbe Carles Martí
            raise NotImplementedError(err_msg)
254 1d8c374e Carles Martí
    return atoms_list