/modules/formats.py - DockOnSurf - Forge du Centre Blaise Pascal

dockonsurf / modules / formats.py @ e7d9c7e8

Historique | Voir | Annoter | Télécharger (9,09 ko)

       """Module for the conversion between atomic coordinates files and objects
       functions:
       confs_to_mol_list: Converts the conformers inside a rdkit mol object to a list
           of separate mol objects.
       rdkit_mol_to_ase_atoms: Converts a rdkit mol object into ase Atoms object.
       adapt_format: Converts the coordinate files into a required library object type.
       read_coords: Reads the atomic coordinates resulting from finished calculations.
       """
       import logging
       import rdkit.Chem.AllChem as Chem
       logger = logging.getLogger('DockOnSurf')
       def confs_to_mol_list(mol: Chem.rdchem.Mol, idx_lst=None):
           """Converts the conformers inside a rdkit mol object to a list of
           separate mol objects.
           @param mol: rdkit mol object containing at least one conformer.
           @param idx_lst: list of conformer indices to be considered. If not passed,
               all conformers are considered.
           @return: list of separate mol objects.
           """
           if idx_lst is None:
               idx_lst = list(range(mol.GetNumConformers()))
           return [Chem.MolFromMolBlock(Chem.MolToMolBlock(mol, confId=int(idx)),
                                        removeHs=False) for idx in idx_lst]
       def rdkit_mol_to_ase_atoms(mol: Chem.rdchem.Mol):
           """Converts a rdkit mol object into ase Atoms object.
           @param mol: rdkit mol object containing only one conformer.
           @return ase.Atoms: ase Atoms object with the same coordinates.
           """
           from ase import Atoms
           if mol.GetNumConformers() > 1:
               logger.warning('A mol object with multiple conformers is parsed, '
                              'converting to Atoms only the first conformer.')
           symbols = [atm.GetSymbol() for atm in mol.GetAtoms()]
           positions = mol.GetConformer(0).GetPositions()
           return Atoms(symbols=symbols, positions=positions)
       def add_special_atoms(symbol_pairs):
           """Allows to use custom elements with symbols not in the periodic table.
           This function adds new chemical elements to be used by ase. Every new custom
           element must have a traditional (present in the periodic table) partner
           from which to obtain all its properties.
           @param symbol_pairs: List of tuples containing the pairs of chemical symbols.
               Every tuple contains a pair of chemical symbols, the first label must be
               the label of the custom element and the second one the symbol of the
               reference one (traditional present on the periodic table).
           @return:
           """
           import numpy as np
           from ase import data
           for i, pair in enumerate(symbol_pairs):
               data.chemical_symbols += [pair[0]]
               z_orig = data.atomic_numbers[pair[1]]
               orig_iupac_mass = data.atomic_masses_iupac2016[z_orig]
               orig_com_mass = data.atomic_masses_common[z_orig]
               data.atomic_numbers[pair[0]] = max(data.atomic_numbers.values()) + 1
               data.atomic_names += [pair[0]]
               data.atomic_masses_iupac2016 = np.append(data.atomic_masses_iupac2016,
                                                        orig_iupac_mass)
               data.atomic_masses = data.atomic_masses_iupac2016
               data.atomic_masses_common = np.append(data.atomic_masses_common,
                                                     orig_com_mass)
               data.covalent_radii = np.append(data.covalent_radii,
                                               data.covalent_radii[z_orig])
               data.reference_states += [data.reference_states[z_orig]]
               # TODO Add vdw_radii, gsmm and aml (smaller length)
       def adapt_format(requirement, coord_file, spec_atms=tuple()):
           """Converts the coordinate files into a required library object type.
           Depending on the library required to use and the file type, it converts the
           coordinate file into a library-workable object.
           @param requirement: str, the library for which the conversion should be
           made. Accepted values: 'ase', 'rdkit'.
           @param coord_file: str, path to the coordinates file aiming to convert.
           Accepted file tyoes: 'xyz', 'mol'.
           @param spec_atms: List of tuples containing pairs of new/traditional
               chemical symbols.
           @return: an object the required library can work with.
           """
           import ase.io
           from ase.io.formats import filetype
           req_vals = ['rdkit', 'ase']
           file_type_vals = ['xyz', 'mol']
           lib_err = f"The conversion to the '{requirement}' library object type" \
                     f" has not yet been implemented"
           conv_info = f"Converted {coord_file} to {requirement} object type"
           fil_type_err = f'The {filetype(coord_file)} file formnat is not supported'
           if requirement not in req_vals:
               logger.error(lib_err)
               raise NotImplementedError(lib_err)
           if filetype(coord_file) not in file_type_vals:
               logger.error(fil_type_err)
               raise NotImplementedError(fil_type_err)
           if requirement == 'rdkit':
               if filetype(coord_file) == 'xyz':
                   from modules.xyz2mol import xyz2mol
                   ase_atms = ase.io.read(coord_file)
                   atomic_nums = ase_atms.get_atomic_numbers().tolist()
                   xyz_coordinates = ase_atms.positions.tolist()
                   rd_mol_obj = xyz2mol(atomic_nums, xyz_coordinates, charge=0)
                   logger.debug(conv_info)
                   return Chem.AddHs(rd_mol_obj)
               elif filetype(coord_file) == 'mol':
                   logger.debug(conv_info)
                   return Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
           if requirement == 'ase':
               add_special_atoms(spec_atms)
               if filetype(coord_file) == 'xyz':
                   logger.debug(conv_info)
                   return ase.io.read(coord_file)
               elif filetype(coord_file) == 'mol':
                   logger.debug(conv_info)
                   rd_mol = Chem.AddHs(Chem.MolFromMolFile(coord_file, removeHs=False))
                   return rdkit_mol_to_ase_atoms(rd_mol)
       def read_coords_cp2k(file, spec_atoms=tuple()):
           """Reads the coordinates from a CP2K restart file and returns an ase.Atoms
            object.
           @param file: The file to read containing the coordinates.
           @param spec_atoms: List of tuples containing the pairs of chemical symbols.
           @return: ase.Atoms object of the coordinates in the file.
           """
           import numpy as np
           from ase import Atoms
           from pycp2k import CP2K
           cp2k = CP2K()
           cp2k.parse(file)
           force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
           raw_coords = force_eval.SUBSYS.COORD.Default_keyword
           symbols = [atom.split()[0] for atom in raw_coords]
           positions = np.array([atom.split()[1:] for atom in raw_coords])
           if len(spec_atoms) > 0:
               add_special_atoms(spec_atoms)  # TODO check usage
           return Atoms(symbols=symbols, positions=positions)
       def read_coords(conf_list, code, run_type, spec_atms=tuple()):
           """Reads the atomic coordinates resulting from finished calculations.
           Given a run_type ('isolated', 'screening' or 'refinement') directory
           containing different subdirectories with finished calculations in every
           subdirectory, it reads, from each subirectory, the final coordinates
           resulting from the calculation and returns a list of objects adequate to the
           required library.
           @param conf_list: List of directories where to read the coords.
           @param code: the code that produced the calculation results files.
           @param run_type: the type of calculation (and also the name of the folder)
                            containing the calculation subdirectories.
           @param req: The required library object type to make the list of (eg. rdkit,
                       ase)
           @param spec_atms: List of tuples containing pairs of new/traditional
               chemical symbols.
           @return: list of collection-of-atoms objects. (rdkit.Mol, ase.Atoms, etc.)
           """
           from glob import glob
           atoms_list = []
           for conf in conf_list:
               if code == 'cp2k':
                   read_coords_cp2k(glob(f"{run_type}/{conf}/*-1.restart")[0],
                                    spec_atms)
           return atoms_list
       def read_energy_cp2k(file):
           """Reads the energies of a CP2K out file and returns its final energy.
           @param file: The file from which the energy should be read.
           @return: The last energy on the out file.
           """
           out_fh = open(file, 'r')
           energy = None
           for line in out_fh:
               if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
                   energy = float(line.strip().split(':')[1])
           out_fh.close()
           return energy
       def read_energies(conf_list, code, run_type):
           """Reads the energies resulting from finished calculations.
           Given a run_type ('isolated', 'screening' or 'refinement') directory
           containing different subdirectories with finished calculations in every
           subdirectory, it reads the final energies of calculations inside each
           subdirectory.
           @param conf_list: List of directories where to read the energy.
           @param code: the code that produced the calculation results files.
           @param run_type: the type of calculation (and also the name of the folder)
                            containing the calculation subdirectories.
           @return: list of energies
           """
           import numpy as np
           energies = []
           for conf in conf_list:
               if code == 'cp2k':
                   energies.append(read_energy_cp2k(conf))
           if len(energies) == 0:
               err = f"No results found on {run_type}"
               logger.error(err)
               raise FileNotFoundError(err)
           return np.array(energies)

Chimie Théorique » scripts_chimie4psmn » DockOnSurf

dockonsurf / modules / formats.py @ e7d9c7e8