Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / formats.py @ b6f47f2d

Historique | Voir | Annoter | Télécharger (4,67 ko)

1
"""Module for the conversion between atomic coordinates files and objects
2

3
functions:
4
confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
5
    of separate mol objects.
6
rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
7
adapt_format: Converts the coordinate files into a required library object type.
8
read_coords: Reads the atomic coordinates resulting from finished calculations.
9
"""
10

    
11
import logging
12

    
13
import rdkit.Chem.AllChem as Chem
14

    
15
logger = logging.getLogger('DockOnSurf')
16

    
17

    
18
def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
19
    """Converts the conformers inside a rdkit mol object to a list of
20
    separate mol objects.
21

22
    @param mol: rdkit mol object containing at least one conformer.
23
    @param idx_lst: list of conformer indices to be considered. If not passed,
24
        all conformers are considered.
25
    @return: list of separate mol objects.
26
    """
27
    if idx_lst is None:
28
        idx_lst = list(range(mol.GetNumConformers()))
29
    return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
30
                                 removeHs=False) for idx in idx_lst]
31

    
32

    
33
def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
34
    """Converts a rdkit mol object into ase Atoms object.
35
    @param mol: rdkit mol object containing only one conformer.
36
    @return ase.Atoms: ase Atoms object with the same coordinates.
37
    """
38
    from ase import Atoms
39
    if mol.GetNumConformers() > 1:
40
        logger.warning('A mol object with multiple conformers is parsed, '
41
                       'converting to Atoms only the first conformer')
42
    symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
43
    positions = mol.GetConformer(0).GetPositions()
44
    return Atoms(symbols=symbols, positions=positions)
45

    
46

    
47
def adapt_format(requirement, coord_file):
48
    """Converts the coordinate files into a required library object type.
49

50
    Depending on the library required to use and the file type, it converts the
51
    coordinate file into a library-workable object.
52
    @param requirement: str, the library for which the conversion should be
53
    made. Accepted values: 'ase', 'rdkit'.
54
    @param coord_file: str, path to the coordinates file aiming to convert.
55
    Accepted file tyoes: 'xyz', 'mol'.
56
    @return: an object the required library can work with.
57
    """
58
    import ase.io
59
    from ase.io.formats import filetype
60

    
61
    req_vals = ['rdkit', 'ase']
62
    file_type_vals = ['xyz', 'mol']
63
    lib_err = f"The conversion to the '{requirement}' library object type" \
64
              f" has not yet been implemented"
65
    conv_info = f"Converted {coord_file} to {requirement} object type"
66

    
67
    fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
68

    
69
    if requirement not in req_vals:
70
        logger.error(lib_err)
71
        raise NotImplementedError(lib_err)
72

    
73
    if filetype(coord_file) not in file_type_vals:
74
        logger.error(fil_type_err)
75
        raise NotImplementedError(fil_type_err)
76

    
77
    if requirement == 'rdkit':
78
        if filetype(coord_file) == 'xyz':
79
            from xyz2mol import xyz2mol
80
            ase_atms = ase.io.read(coord_file)
81
            atomic_nums = ase_atms.get_atomic_numbers().tolist()
82
            xyz_coordinates = ase_atms.positions.tolist()
83
            # TODO Add routine to read charge
84
            rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
85
            logger.debug(conv_info)
86
            return rd_mol_obj
87
        elif filetype(coord_file) == 'mol':
88
            from rdkit.Chem import MolFromMolFile
89
            logger.debug(conv_info)
90
            return MolFromMolFile(coord_file, removeHs=False)
91

    
92
    if requirement == 'ase':
93
        logger.debug(conv_info)
94
        return ase.io.read(coord_file)
95

    
96

    
97
def read_coords(code, run_type, req):
98
    """Reads the atomic coordinates resulting from finished calculations.
99

100
    Given a run_type ('isolated', 'screening' or 'refinement') directory
101
    containing different subdirectories with finished calculations in every
102
    subdirectory, it reads the coordinates inside each subdirectory according to
103
    the specified code and returns a list of objects required.
104

105
    @param code: the code that produced the coordinates out of a calculation.
106
    @param run_type: the type of calculation (and also the name of the folder)
107
                     containing the calculation subdirectories.
108
    @param req: The required object type to make the list of.
109
    @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
110
    """
111
    import os
112
    if code == 'cp2k':
113
        pattern = '-pos-1.xyz'
114
    else:
115
        pattern = ''
116
    return [adapt_format(req, f'{run_type}/{conf}/{fil}')
117
            for conf in os.listdir(run_type)
118
            for fil in os.listdir(f"{run_type}/{conf}") if pattern in fil]