Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ cf980c86

Historique | Voir | Annoter | Télécharger (10,57 ko)

1 4e82c425 Carles Martí
"""Module for the conversion between different kinds of atomic data.
2 e23f119b Carles

3 e23f119b Carles
functions:
4 fd2384fc Carles Marti
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
5 fd2384fc Carles Marti
    of separate rdkit.Mol objects.
6 fd2384fc Carles Marti
rdkit_mol_to_ase_atoms: Converts a rdkit.Mol object into ase.Atoms object.
7 fd2384fc Carles Marti
add_special_atoms: Allows ase to use custom elements with symbols not in the
8 fd2384fc Carles Marti
    periodic table.
9 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
10 fd2384fc Carles Marti
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 fd2384fc Carles Marti
    ase.Atoms object.
12 4e82c425 Carles Martí
read_coords_vasp: Reads the coordinates from VASP OUTCAR file and returns an
13 4e82c425 Carles Martí
    ase.Atoms object.
14 fd2384fc Carles Marti
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
15 4e82c425 Carles Martí
collect_confs: Reads the coordinates and energies of a list of finished
16 4e82c425 Carles Martí
    calculations.
17 e23f119b Carles
"""
18 e23f119b Carles
19 e23f119b Carles
import logging
20 f3004731 Carles
21 f3004731 Carles
import rdkit.Chem.AllChem as Chem
22 f3004731 Carles
23 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
24 e23f119b Carles
25 e23f119b Carles
26 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
27 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
28 f3004731 Carles
    separate mol objects.
29 f3004731 Carles

30 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
31 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
32 9510666f Carles
        all conformers are considered.
33 f3004731 Carles
    @return: list of separate mol objects.
34 f3004731 Carles
    """
35 f3004731 Carles
    if idx_lst is None:
36 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
37 f02e613b Carles Marti
    return [Chem.MolFromMolBlock(
38 f02e613b Carles Marti
        Chem.MolToMolBlock(mol, confId=int(idx)).replace("3D", ""),
39 f02e613b Carles Marti
        removeHs=False) for idx in idx_lst]
40 f3004731 Carles
41 f3004731 Carles
42 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
43 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
44 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
45 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
46 f3004731 Carles
    """
47 f3004731 Carles
    from ase import Atoms
48 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
49 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
50 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
51 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
52 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
53 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
54 f3004731 Carles
55 f3004731 Carles
56 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
57 fd2384fc Carles Marti
    """Allows ase to use custom elements with symbols not in the periodic table.
58 cc92f6ee Carles Marti

59 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
60 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
61 cc92f6ee Carles Marti
    from which to obtain all its properties.
62 fd2384fc Carles Marti
    @param symbol_pairs: List of tuples containing pairs of chemical symbols.
63 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
64 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
65 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
66 cc92f6ee Carles Marti
    @return:
67 587dca22 Carles Marti
    """  # TODO Enable special atoms for rdkit
68 cc92f6ee Carles Marti
    import numpy as np
69 cc92f6ee Carles Marti
    from ase import data
70 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
71 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
72 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
73 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
74 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
75 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
76 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
77 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
78 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
79 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
80 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
81 cc92f6ee Carles Marti
                                              orig_com_mass)
82 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
83 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
84 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
85 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
86 cc92f6ee Carles Marti
87 cc92f6ee Carles Marti
88 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
89 e23f119b Carles
    """Converts the coordinate files into a required library object type.
90 e23f119b Carles

91 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
92 e23f119b Carles
    coordinate file into a library-workable object.
93 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
94 4e82c425 Carles Martí
        made. Accepted values: 'ase', 'rdkit'.
95 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
96 4e82c425 Carles Martí
        Accepted file formats: all file formats readable by ase.
97 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
98 90819cc3 Carles Marti
        chemical symbols.
99 e23f119b Carles
    @return: an object the required library can work with.
100 748a6036 Carles Martí
    """
101 439ce5f7 Carles
    import ase.io
102 8ab593ee Carles
    from ase.io.formats import filetype
103 8ab593ee Carles
104 748a6036 Carles Martí
    from modules.utilities import try_command
105 748a6036 Carles Martí
106 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
107 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
108 4381145e Carles
              f" has not yet been implemented"
109 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
110 4381145e Carles
111 748a6036 Carles Martí
    fil_type_err = f'The {filetype(coord_file)} file format is not supported'
112 4381145e Carles
113 4381145e Carles
    if requirement not in req_vals:
114 9f7bb440 Carles
        logger.error(lib_err)
115 4381145e Carles
        raise NotImplementedError(lib_err)
116 4381145e Carles
117 8ab593ee Carles
    if requirement == 'rdkit':
118 748a6036 Carles Martí
        from modules.xyz2mol import xyz2mol
119 748a6036 Carles Martí
        if filetype(coord_file) == 'xyz':  # TODO Include detection of charges.
120 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
121 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
122 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
123 748a6036 Carles Martí
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates)
124 8d08beb4 Carles
            logger.debug(conv_info)
125 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
126 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
127 8d08beb4 Carles
            logger.debug(conv_info)
128 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
129 748a6036 Carles Martí
        else:
130 748a6036 Carles Martí
            ase_atms = try_command(ase.io.read,
131 748a6036 Carles Martí
                                   [(ase.io.formats.UnknownFileTypeError,
132 748a6036 Carles Martí
                                     fil_type_err)],
133 748a6036 Carles Martí
                                   coord_file)
134 748a6036 Carles Martí
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
135 748a6036 Carles Martí
            xyz_coordinates = ase_atms.positions.tolist()
136 748a6036 Carles Martí
            return xyz2mol(atomic_nums, xyz_coordinates)
137 8ab593ee Carles
138 8ab593ee Carles
    if requirement == 'ase':
139 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
140 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
141 21e2cca5 Carles Marti
            logger.debug(conv_info)
142 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
143 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
144 21e2cca5 Carles Marti
            logger.debug(conv_info)
145 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
146 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
147 748a6036 Carles Martí
        else:
148 748a6036 Carles Martí
            return try_command(ase.io.read,
149 748a6036 Carles Martí
                               [(ase.io.formats.UnknownFileTypeError,
150 748a6036 Carles Martí
                                 fil_type_err)],
151 748a6036 Carles Martí
                               coord_file)
152 b6f47f2d Carles
153 b6f47f2d Carles
154 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
155 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
156 ed01b5b7 Carles Marti
     object.
157 ed01b5b7 Carles Marti

158 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
159 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
160 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
161 ed01b5b7 Carles Marti
    """
162 ed01b5b7 Carles Marti
    import numpy as np
163 ed01b5b7 Carles Marti
    from ase import Atoms
164 ed01b5b7 Carles Marti
    from pycp2k import CP2K
165 ed01b5b7 Carles Marti
166 ed01b5b7 Carles Marti
    cp2k = CP2K()
167 ed01b5b7 Carles Marti
    cp2k.parse(file)
168 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
169 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
170 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
171 1d8c374e Carles Martí
    positions = np.array([[float(coord) for coord in atom.split()[1:]]
172 1d8c374e Carles Martí
                          for atom in raw_coords])
173 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
174 4387ce69 Carles Marti
        add_special_atoms(spec_atoms)
175 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
176 ed01b5b7 Carles Marti
177 ed01b5b7 Carles Marti
178 cdc1edbe Carles Martí
def read_coords_vasp(file, spec_atoms=tuple()):
179 cdc1edbe Carles Martí
    """Reads the coordinates from VASP OUTCAR and returns an ase.Atoms object.
180 cdc1edbe Carles Martí

181 cdc1edbe Carles Martí
    @param file: The file to read containing the coordinates.
182 cdc1edbe Carles Martí
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
183 cdc1edbe Carles Martí
    @return: ase.Atoms object of the coordinates in the file.
184 cdc1edbe Carles Martí
    """
185 cdc1edbe Carles Martí
    import os
186 cdc1edbe Carles Martí
    import ase.io
187 cdc1edbe Carles Martí
    if not os.path.isfile(file):
188 cdc1edbe Carles Martí
        err_msg = f"File {file} not found."
189 cdc1edbe Carles Martí
        logger.error(err_msg)
190 cdc1edbe Carles Martí
        raise FileNotFoundError(err_msg)
191 cdc1edbe Carles Martí
    if len(spec_atoms) > 0:
192 cdc1edbe Carles Martí
        add_special_atoms(spec_atoms)
193 cdc1edbe Carles Martí
    return ase.io.read(file)
194 cdc1edbe Carles Martí
195 cdc1edbe Carles Martí
196 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
197 1d8c374e Carles Martí
    """Reads the CP2K output file and returns its final energy.
198 ed01b5b7 Carles Marti

199 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
200 1d8c374e Carles Martí
    @return: The last energy on the out file in eV.
201 ed01b5b7 Carles Marti
    """
202 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
203 ed01b5b7 Carles Marti
    energy = None
204 ed01b5b7 Carles Marti
    for line in out_fh:
205 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
206 9371fd91 Carles Marti
            energy = float(line.strip().split(':')[1]) * 27.2113845  # Ha to eV
207 ed01b5b7 Carles Marti
    out_fh.close()
208 ed01b5b7 Carles Marti
    return energy
209 ed01b5b7 Carles Marti
210 ed01b5b7 Carles Marti
211 1d8c374e Carles Martí
def collect_confs(dir_list, code, run_type, spec_atms=tuple()):
212 1d8c374e Carles Martí
    """Reads the coordinates and energies of a list of finished calculations.
213 f8c4eafe Carles

214 fd2384fc Carles Marti
    Given a dockonsurf directory hierarchy: project/run_type/conf_X
215 1d8c374e Carles Martí
    (run_type = ['isolated', 'screening' or 'refinement']) it reads the
216 1d8c374e Carles Martí
    coordinates of each conf_X, it assigns its total energy from the calculation
217 1d8c374e Carles Martí
    and assigns the conf_X label to track its origin. Finally it returns the
218 1d8c374e Carles Martí
    ase.Atoms object.
219 1d8c374e Carles Martí

220 1d8c374e Carles Martí
    @param dir_list: List of directories where to read the coords from.
221 1d8c374e Carles Martí
    @param code: the code that produced the calculation results files.
222 1d8c374e Carles Martí
    @param run_type: the type of calculation (and also the name of the folder)
223 1d8c374e Carles Martí
        containing the calculation subdirectories.
224 1d8c374e Carles Martí
    @param spec_atms: List of tuples containing pairs of new/traditional
225 1d8c374e Carles Martí
        chemical symbols.
226 1d8c374e Carles Martí
    @return: list of ase.Atoms objects.
227 f8c4eafe Carles
    """
228 fd2384fc Carles Marti
    from glob import glob
229 1d8c374e Carles Martí
    import os
230 1d8c374e Carles Martí
    from modules.utilities import is_binary
231 1d8c374e Carles Martí
    atoms_list = []
232 1d8c374e Carles Martí
    for conf_dir in dir_list:
233 1d8c374e Carles Martí
        conf_path = f"{run_type}/{conf_dir}/"
234 ed01b5b7 Carles Marti
        if code == 'cp2k':
235 1d8c374e Carles Martí
            ase_atms = read_coords_cp2k(glob(f"{conf_path}/*-1.restart")[0],
236 1d8c374e Carles Martí
                                        spec_atms)
237 1d8c374e Carles Martí
            # Assign energy
238 1d8c374e Carles Martí
            for fil in os.listdir(conf_path):
239 4e82c425 Carles Martí
                if is_binary(conf_path + fil):
240 1d8c374e Carles Martí
                    continue
241 4e82c425 Carles Martí
                conf_energy = read_energy_cp2k(conf_path + fil)
242 1d8c374e Carles Martí
                if conf_energy is not None:
243 1d8c374e Carles Martí
                    ase_atms.info["energy"] = conf_energy
244 1d8c374e Carles Martí
                    break
245 1d8c374e Carles Martí
            ase_atms.info[run_type[:3]] = conf_dir
246 1d8c374e Carles Martí
            atoms_list.append(ase_atms)
247 1d8c374e Carles Martí
        elif code == 'vasp':
248 1d8c374e Carles Martí
            ase_atms = read_coords_vasp(f"{conf_path}/OUTCAR", spec_atms)
249 1d8c374e Carles Martí
            ase_atms.info["energy"] = ase_atms.get_total_energy() * 27.2113845
250 1d8c374e Carles Martí
            ase_atms.info[run_type[:3]] = conf_dir
251 1d8c374e Carles Martí
            atoms_list.append(ase_atms)
252 cdc1edbe Carles Martí
        else:
253 1d8c374e Carles Martí
            err_msg = f"Collect coordinates not implemented for '{code}'."
254 cdc1edbe Carles Martí
            logger.error(err_msg)
255 cdc1edbe Carles Martí
            raise NotImplementedError(err_msg)
256 1d8c374e Carles Martí
    return atoms_list