Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ e7d9c7e8

Historique | Voir | Annoter | Télécharger (9,09 ko)

1 b6f47f2d Carles
"""Module for the conversion between atomic coordinates files and objects
2 e23f119b Carles

3 e23f119b Carles
functions:
4 f3004731 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 f3004731 Carles
    of separate mol objects.
6 f3004731 Carles
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
8 b6f47f2d Carles
read_coords: Reads the atomic coordinates resulting from finished calculations.
9 e23f119b Carles
"""
10 e23f119b Carles
11 e23f119b Carles
import logging
12 f3004731 Carles
13 f3004731 Carles
import rdkit.Chem.AllChem as Chem
14 f3004731 Carles
15 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
16 e23f119b Carles
17 e23f119b Carles
18 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
20 f3004731 Carles
    separate mol objects.
21 f3004731 Carles

22 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
23 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
24 9510666f Carles
        all conformers are considered.
25 f3004731 Carles
    @return: list of separate mol objects.
26 f3004731 Carles
    """
27 f3004731 Carles
    if idx_lst is None:
28 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
29 f3004731 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30 f3004731 Carles
                                 removeHs=False) for idx in idx_lst]
31 f3004731 Carles
32 f3004731 Carles
33 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
35 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
36 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
37 f3004731 Carles
    """
38 f3004731 Carles
    from ase import Atoms
39 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
40 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
41 695dcff8 Carles Marti
                       'converting to Atoms only the first conformer.')
42 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
44 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
45 f3004731 Carles
46 f3004731 Carles
47 cc92f6ee Carles Marti
def add_special_atoms(symbol_pairs):
48 cc92f6ee Carles Marti
    """Allows to use custom elements with symbols not in the periodic table.
49 cc92f6ee Carles Marti

50 cc92f6ee Carles Marti
    This function adds new chemical elements to be used by ase. Every new custom
51 cc92f6ee Carles Marti
    element must have a traditional (present in the periodic table) partner
52 cc92f6ee Carles Marti
    from which to obtain all its properties.
53 cc92f6ee Carles Marti
    @param symbol_pairs: List of tuples containing the pairs of chemical symbols.
54 cc92f6ee Carles Marti
        Every tuple contains a pair of chemical symbols, the first label must be
55 cc92f6ee Carles Marti
        the label of the custom element and the second one the symbol of the
56 cc92f6ee Carles Marti
        reference one (traditional present on the periodic table).
57 cc92f6ee Carles Marti
    @return:
58 cc92f6ee Carles Marti
    """
59 cc92f6ee Carles Marti
    import numpy as np
60 cc92f6ee Carles Marti
    from ase import data
61 cc92f6ee Carles Marti
    for i, pair in enumerate(symbol_pairs):
62 cc92f6ee Carles Marti
        data.chemical_symbols += [pair[0]]
63 cc92f6ee Carles Marti
        z_orig = data.atomic_numbers[pair[1]]
64 cc92f6ee Carles Marti
        orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
65 cc92f6ee Carles Marti
        orig_com_mass = data.atomic_masses_common[z_orig]
66 cc92f6ee Carles Marti
        data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
67 cc92f6ee Carles Marti
        data.atomic_names += [pair[0]]
68 cc92f6ee Carles Marti
        data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
69 cc92f6ee Carles Marti
                                                 orig_iupac_mass)
70 cc92f6ee Carles Marti
        data.atomic_masses = data.atomic_masses_iupac2016
71 cc92f6ee Carles Marti
        data.atomic_masses_common = np.append(data.atomic_masses_common,
72 cc92f6ee Carles Marti
                                              orig_com_mass)
73 cc92f6ee Carles Marti
        data.covalent_radii = np.append(data.covalent_radii,
74 cc92f6ee Carles Marti
                                        data.covalent_radii[z_orig])
75 cc92f6ee Carles Marti
        data.reference_states += [data.reference_states[z_orig]]
76 90819cc3 Carles Marti
        # TODO Add vdw_radii, gsmm and aml (smaller length)
77 cc92f6ee Carles Marti
78 cc92f6ee Carles Marti
79 90819cc3 Carles Marti
def adapt_format(requirement, coord_file, spec_atms=tuple()):
80 e23f119b Carles
    """Converts the coordinate files into a required library object type.
81 e23f119b Carles

82 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
83 e23f119b Carles
    coordinate file into a library-workable object.
84 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
85 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
86 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
87 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
88 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
89 90819cc3 Carles Marti
        chemical symbols.
90 e23f119b Carles
    @return: an object the required library can work with.
91 e23f119b Carles
    """
92 439ce5f7 Carles
    import ase.io
93 8ab593ee Carles
    from ase.io.formats import filetype
94 8ab593ee Carles
95 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
96 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
97 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
98 4381145e Carles
              f" has not yet been implemented"
99 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
100 4381145e Carles
101 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
102 4381145e Carles
103 4381145e Carles
    if requirement not in req_vals:
104 9f7bb440 Carles
        logger.error(lib_err)
105 4381145e Carles
        raise NotImplementedError(lib_err)
106 4381145e Carles
107 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
108 9f7bb440 Carles
        logger.error(fil_type_err)
109 4381145e Carles
        raise NotImplementedError(fil_type_err)
110 8ab593ee Carles
111 8ab593ee Carles
    if requirement == 'rdkit':
112 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
113 af3e2441 Carles Marti
            from modules.xyz2mol import xyz2mol
114 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
115 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
116 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
117 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
118 8d08beb4 Carles
            logger.debug(conv_info)
119 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
120 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
121 8d08beb4 Carles
            logger.debug(conv_info)
122 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
123 8ab593ee Carles
124 8ab593ee Carles
    if requirement == 'ase':
125 cc92f6ee Carles Marti
        add_special_atoms(spec_atms)
126 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
127 21e2cca5 Carles Marti
            logger.debug(conv_info)
128 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
129 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
130 21e2cca5 Carles Marti
            logger.debug(conv_info)
131 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
132 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
133 b6f47f2d Carles
134 b6f47f2d Carles
135 ed01b5b7 Carles Marti
def read_coords_cp2k(file, spec_atoms=tuple()):
136 ed01b5b7 Carles Marti
    """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
137 ed01b5b7 Carles Marti
     object.
138 ed01b5b7 Carles Marti

139 ed01b5b7 Carles Marti
    @param file: The file to read containing the coordinates.
140 ed01b5b7 Carles Marti
    @param spec_atoms: List of tuples containing the pairs of chemical symbols.
141 ed01b5b7 Carles Marti
    @return: ase.Atoms object of the coordinates in the file.
142 ed01b5b7 Carles Marti
    """
143 ed01b5b7 Carles Marti
    import numpy as np
144 ed01b5b7 Carles Marti
    from ase import Atoms
145 ed01b5b7 Carles Marti
    from pycp2k import CP2K
146 ed01b5b7 Carles Marti
147 ed01b5b7 Carles Marti
    cp2k = CP2K()
148 ed01b5b7 Carles Marti
    cp2k.parse(file)
149 ed01b5b7 Carles Marti
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
150 ed01b5b7 Carles Marti
    raw_coords = force_eval.SUBSYS.COORD.Default_keyword
151 ed01b5b7 Carles Marti
    symbols = [atom.split()[0] for atom in raw_coords]
152 ed01b5b7 Carles Marti
    positions = np.array([atom.split()[1:] for atom in raw_coords])
153 ed01b5b7 Carles Marti
    if len(spec_atoms) > 0:
154 ed01b5b7 Carles Marti
        add_special_atoms(spec_atoms)  # TODO check usage
155 ed01b5b7 Carles Marti
    return Atoms(symbols=symbols, positions=positions)
156 ed01b5b7 Carles Marti
157 ed01b5b7 Carles Marti
158 ed01b5b7 Carles Marti
def read_coords(conf_list, code, run_type, spec_atms=tuple()):
159 b6f47f2d Carles
    """Reads the atomic coordinates resulting from finished calculations.
160 b6f47f2d Carles

161 b6f47f2d Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
162 b6f47f2d Carles
    containing different subdirectories with finished calculations in every
163 f8c4eafe Carles
    subdirectory, it reads, from each subirectory, the final coordinates
164 f8c4eafe Carles
    resulting from the calculation and returns a list of objects adequate to the
165 f8c4eafe Carles
    required library.
166 b6f47f2d Carles

167 ed01b5b7 Carles Marti
    @param conf_list: List of directories where to read the coords.
168 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
169 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
170 b6f47f2d Carles
                     containing the calculation subdirectories.
171 f8c4eafe Carles
    @param req: The required library object type to make the list of (eg. rdkit,
172 f8c4eafe Carles
                ase)
173 90819cc3 Carles Marti
    @param spec_atms: List of tuples containing pairs of new/traditional
174 90819cc3 Carles Marti
        chemical symbols.
175 b6f47f2d Carles
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
176 b6f47f2d Carles
    """
177 e7d9c7e8 Carles Marti
    from glob import glob
178 c6e71e46 Carles Marti
    atoms_list = []
179 ed01b5b7 Carles Marti
    for conf in conf_list:
180 ed01b5b7 Carles Marti
        if code == 'cp2k':
181 e7d9c7e8 Carles Marti
            read_coords_cp2k(glob(f"{run_type}/{conf}/*-1.restart")[0],
182 e7d9c7e8 Carles Marti
                             spec_atms)
183 ed01b5b7 Carles Marti
184 c6e71e46 Carles Marti
    return atoms_list
185 f8c4eafe Carles
186 f8c4eafe Carles
187 ed01b5b7 Carles Marti
def read_energy_cp2k(file):
188 ed01b5b7 Carles Marti
    """Reads the energies of a CP2K out file and returns its final energy.
189 ed01b5b7 Carles Marti

190 ed01b5b7 Carles Marti
    @param file: The file from which the energy should be read.
191 ed01b5b7 Carles Marti
    @return: The last energy on the out file.
192 ed01b5b7 Carles Marti
    """
193 ed01b5b7 Carles Marti
    out_fh = open(file, 'r')
194 ed01b5b7 Carles Marti
    energy = None
195 ed01b5b7 Carles Marti
    for line in out_fh:
196 ed01b5b7 Carles Marti
        if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
197 ed01b5b7 Carles Marti
            energy = float(line.strip().split(':')[1])
198 ed01b5b7 Carles Marti
    out_fh.close()
199 ed01b5b7 Carles Marti
    return energy
200 ed01b5b7 Carles Marti
201 ed01b5b7 Carles Marti
202 ed01b5b7 Carles Marti
def read_energies(conf_list, code, run_type):
203 f8c4eafe Carles
    """Reads the energies resulting from finished calculations.
204 f8c4eafe Carles

205 f8c4eafe Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
206 f8c4eafe Carles
    containing different subdirectories with finished calculations in every
207 f8c4eafe Carles
    subdirectory, it reads the final energies of calculations inside each
208 f8c4eafe Carles
    subdirectory.
209 f8c4eafe Carles

210 ed01b5b7 Carles Marti
    @param conf_list: List of directories where to read the energy.
211 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
212 f8c4eafe Carles
    @param run_type: the type of calculation (and also the name of the folder)
213 f8c4eafe Carles
                     containing the calculation subdirectories.
214 f8c4eafe Carles
    @return: list of energies
215 f8c4eafe Carles
    """
216 f8c4eafe Carles
    import numpy as np
217 f8c4eafe Carles
218 f8c4eafe Carles
    energies = []
219 ed01b5b7 Carles Marti
    for conf in conf_list:
220 ed01b5b7 Carles Marti
        if code == 'cp2k':
221 d5c9605d Carles Marti
            energies.append(read_energy_cp2k(conf))
222 ed01b5b7 Carles Marti
223 ed01b5b7 Carles Marti
    if len(energies) == 0:
224 ed01b5b7 Carles Marti
        err = f"No results found on {run_type}"
225 ed01b5b7 Carles Marti
        logger.error(err)
226 ed01b5b7 Carles Marti
        raise FileNotFoundError(err)
227 f8c4eafe Carles
228 f8c4eafe Carles
    return np.array(energies)