Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ 0dfa15ff

Historique | Voir | Annoter | Télécharger (6,22 ko)

1 b6f47f2d Carles
"""Module for the conversion between atomic coordinates files and objects
2 e23f119b Carles

3 e23f119b Carles
functions:
4 f3004731 Carles
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5 f3004731 Carles
    of separate mol objects.
6 f3004731 Carles
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7 b6f47f2d Carles
adapt_format: Converts the coordinate files into a required library object type.
8 b6f47f2d Carles
read_coords: Reads the atomic coordinates resulting from finished calculations.
9 e23f119b Carles
"""
10 e23f119b Carles
11 e23f119b Carles
import logging
12 f3004731 Carles
13 f3004731 Carles
import rdkit.Chem.AllChem as Chem
14 f3004731 Carles
15 89a980fc Carles
logger = logging.getLogger('DockOnSurf')
16 e23f119b Carles
17 e23f119b Carles
18 f3004731 Carles
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19 f3004731 Carles
    """Converts the conformers inside a rdkit mol object to a list of
20 f3004731 Carles
    separate mol objects.
21 f3004731 Carles

22 f3004731 Carles
    @param mol: rdkit mol object containing at least one conformer.
23 9510666f Carles
    @param idx_lst: list of conformer indices to be considered. If not passed,
24 9510666f Carles
        all conformers are considered.
25 f3004731 Carles
    @return: list of separate mol objects.
26 f3004731 Carles
    """
27 f3004731 Carles
    if idx_lst is None:
28 f3004731 Carles
        idx_lst = list(range(mol.GetNumConformers()))
29 f3004731 Carles
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30 f3004731 Carles
                                 removeHs=False) for idx in idx_lst]
31 f3004731 Carles
32 f3004731 Carles
33 f3004731 Carles
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34 f3004731 Carles
    """Converts a rdkit mol object into ase Atoms object.
35 f3004731 Carles
    @param mol: rdkit mol object containing only one conformer.
36 f3004731 Carles
    @return ase.Atoms: ase Atoms object with the same coordinates.
37 f3004731 Carles
    """
38 f3004731 Carles
    from ase import Atoms
39 4933cb8a Carles Martí
    if mol.GetNumConformers() > 1:
40 9510666f Carles
        logger.warning('A mol object with multiple conformers is parsed, '
41 9510666f Carles
                       'converting to Atoms only the first conformer')
42 f3004731 Carles
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43 f3004731 Carles
    positions = mol.GetConformer(0).GetPositions()
44 f3004731 Carles
    return Atoms(symbols=symbols, positions=positions)
45 f3004731 Carles
46 f3004731 Carles
47 83f022c9 Carles
def adapt_format(requirement, coord_file):
48 e23f119b Carles
    """Converts the coordinate files into a required library object type.
49 e23f119b Carles

50 e23f119b Carles
    Depending on the library required to use and the file type, it converts the
51 e23f119b Carles
    coordinate file into a library-workable object.
52 e23f119b Carles
    @param requirement: str, the library for which the conversion should be
53 e23f119b Carles
    made. Accepted values: 'ase', 'rdkit'.
54 e23f119b Carles
    @param coord_file: str, path to the coordinates file aiming to convert.
55 e23f119b Carles
    Accepted file tyoes: 'xyz', 'mol'.
56 e23f119b Carles
    @return: an object the required library can work with.
57 e23f119b Carles
    """
58 439ce5f7 Carles
    import ase.io
59 8ab593ee Carles
    from ase.io.formats import filetype
60 8ab593ee Carles
61 8ab593ee Carles
    req_vals = ['rdkit', 'ase']
62 8ab593ee Carles
    file_type_vals = ['xyz', 'mol']
63 4381145e Carles
    lib_err = f"The conversion to the '{requirement}' library object type" \
64 4381145e Carles
              f" has not yet been implemented"
65 4381145e Carles
    conv_info = f"Converted {coord_file} to {requirement} object type"
66 4381145e Carles
67 f3004731 Carles
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
68 4381145e Carles
69 4381145e Carles
    if requirement not in req_vals:
70 9f7bb440 Carles
        logger.error(lib_err)
71 4381145e Carles
        raise NotImplementedError(lib_err)
72 4381145e Carles
73 4381145e Carles
    if filetype(coord_file) not in file_type_vals:
74 9f7bb440 Carles
        logger.error(fil_type_err)
75 4381145e Carles
        raise NotImplementedError(fil_type_err)
76 8ab593ee Carles
77 8ab593ee Carles
    if requirement == 'rdkit':
78 8ab593ee Carles
        if filetype(coord_file) == 'xyz':
79 af3e2441 Carles Marti
            from modules.xyz2mol import xyz2mol
80 439ce5f7 Carles
            ase_atms = ase.io.read(coord_file)
81 439ce5f7 Carles
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
82 439ce5f7 Carles
            xyz_coordinates = ase_atms.positions.tolist()
83 b6f47f2d Carles
            # TODO Add routine to read charge
84 b6f47f2d Carles
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
85 8d08beb4 Carles
            logger.debug(conv_info)
86 21e2cca5 Carles Marti
            return Chem.AddHs(rd_mol_obj)
87 8ab593ee Carles
        elif filetype(coord_file) == 'mol':
88 8d08beb4 Carles
            logger.debug(conv_info)
89 21e2cca5 Carles Marti
            return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
90 8ab593ee Carles
91 8ab593ee Carles
    if requirement == 'ase':
92 21e2cca5 Carles Marti
        if filetype(coord_file) == 'xyz':
93 21e2cca5 Carles Marti
            logger.debug(conv_info)
94 21e2cca5 Carles Marti
            return ase.io.read(coord_file)
95 21e2cca5 Carles Marti
        elif filetype(coord_file) == 'mol':
96 21e2cca5 Carles Marti
            logger.debug(conv_info)
97 21e2cca5 Carles Marti
            rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
98 21e2cca5 Carles Marti
            return rdkit_mol_to_ase_atoms(rd_mol)
99 b6f47f2d Carles
100 b6f47f2d Carles
101 b6f47f2d Carles
def read_coords(code, run_type, req):
102 b6f47f2d Carles
    """Reads the atomic coordinates resulting from finished calculations.
103 b6f47f2d Carles

104 b6f47f2d Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
105 b6f47f2d Carles
    containing different subdirectories with finished calculations in every
106 f8c4eafe Carles
    subdirectory, it reads, from each subirectory, the final coordinates
107 f8c4eafe Carles
    resulting from the calculation and returns a list of objects adequate to the
108 f8c4eafe Carles
    required library.
109 b6f47f2d Carles

110 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
111 b6f47f2d Carles
    @param run_type: the type of calculation (and also the name of the folder)
112 b6f47f2d Carles
                     containing the calculation subdirectories.
113 f8c4eafe Carles
    @param req: The required library object type to make the list of (eg. rdkit,
114 f8c4eafe Carles
                ase)
115 b6f47f2d Carles
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
116 b6f47f2d Carles
    """
117 b6f47f2d Carles
    import os
118 b6f47f2d Carles
    if code == 'cp2k':
119 b6f47f2d Carles
        pattern = '-pos-1.xyz'
120 b6f47f2d Carles
    else:
121 b6f47f2d Carles
        pattern = ''
122 b6f47f2d Carles
    return [adapt_format(req, f'{run_type}/{conf}/{fil}')
123 b6f47f2d Carles
            for conf in os.listdir(run_type)
124 b6f47f2d Carles
            for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil]
125 f8c4eafe Carles
126 f8c4eafe Carles
127 f8c4eafe Carles
def read_energies(code, run_type):
128 f8c4eafe Carles
    """Reads the energies resulting from finished calculations.
129 f8c4eafe Carles

130 f8c4eafe Carles
    Given a run_type ('isolated', 'screening' or 'refinement') directory
131 f8c4eafe Carles
    containing different subdirectories with finished calculations in every
132 f8c4eafe Carles
    subdirectory, it reads the final energies of calculations inside each
133 f8c4eafe Carles
    subdirectory.
134 f8c4eafe Carles

135 f8c4eafe Carles
    @param code: the code that produced the calculation results files.
136 f8c4eafe Carles
    @param run_type: the type of calculation (and also the name of the folder)
137 f8c4eafe Carles
                     containing the calculation subdirectories.
138 f8c4eafe Carles
    @return: list of energies
139 f8c4eafe Carles
    """
140 f8c4eafe Carles
    import os
141 f8c4eafe Carles
    import numpy as np
142 af3e2441 Carles Marti
    from modules.utilities import tail
143 f8c4eafe Carles
144 f8c4eafe Carles
    energies = []
145 f8c4eafe Carles
    if code == 'cp2k':
146 f8c4eafe Carles
        pattern = '-pos-1.xyz'
147 f8c4eafe Carles
        for conf in os.listdir(run_type):
148 f8c4eafe Carles
            for fil in os.listdir(f"{run_type}/{conf}"):
149 f8c4eafe Carles
                if pattern in fil:
150 f8c4eafe Carles
                    traj_fh = open(f"{run_type}/{conf}/{fil}", 'rb')
151 f8c4eafe Carles
                    num_atoms = int(traj_fh.readline().strip())
152 f8c4eafe Carles
                    last_geo = tail(traj_fh, num_atoms + 2).splitlines()
153 f8c4eafe Carles
                    for line in last_geo:
154 f8c4eafe Carles
                        if 'E =' in line:
155 f8c4eafe Carles
                            energies.append(float(line.split('E =')[1]))
156 f8c4eafe Carles
157 f8c4eafe Carles
    return np.array(energies)