/ - Diff - DockOnSurf - Forge du Centre Blaise Pascal

Révision 86112fec

     #####################################################
     # Please change here the paths to your directories: #
     #####################################################
     # Directory of the DockOnSurf script:
     export DockOnSurf_path="/home/sblanck/DockOnSurf_hematite"
     # Directory where you want to get the results for the molecule alone:
     # The script will automatically create a subdirectory with the name of the studied molecule
     export Molecule_results_path="/home/sblanck/cp2k/hematite_molecules"
     # Directory where you want to get the results for the asorption structures:
     # The script will automatically create a subdirectory with the name of the studied molecule
     export MolOnSurf_results_path="/home/sblanck/cp2k/hematite"
     # Directory where your surface xyz files are stored:
     export Surface_path="/home/sblanck/DockOnSurf_hematite/surfaces"
     # Directory where your molecule mol files are stored:
     export Molecule_path="/home/sblanck/DockOnSurf_hematite/molecules"
     # Input file for CP2K calculation for molecule alone:
     # Please check that the PROJECT_NAME is "molecule"
     export CP2K_input_molecule="/home/sblanck/DockOnSurf_hematite/CP2K_input/molecule.inp"
     # Input file for CP2K calculation for molecule on the surface:
     # Please check that the PROJECT_NAME is "surf_molecule"
     export CP2K_input_MolOnSurf="/home/sblanck/DockOnSurf_hematite/CP2K_input/adsorption.inp"
     # Submission script file for CP2K calculations:
     # Please check that the name of the Job is "XXXX"
     export CP2K_sub="/home/sblanck/DockOnSurf_hematite/CP2K_input/gammakpoint.j"

     #!/bin/bash
     user="$(pwd | awk -F"/" '{print $3}')"
     file_path="$(find /home/${user} -name "DockOnSurf_path")"
     ### Mise en place de l'arborescence
     source ${file_path}
     ### Récupération des vecteurs de cellule
     a1="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $2}')"
     if [[ $a1 =~ "E+00" ]]; then a1="$(echo $a1 | cut -d "E" -f1)"; fi ; export a1
     a2="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $3}')"
     if [[ $a2 =~ "E+00" ]]; then a2="$(echo $a2 | cut -d "E" -f1)"; fi ; export a2
     a3="$(grep "A\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^A/ {print}' | awk '{print $4}')"
     if [[ $a3 =~ "E+00" ]]; then a3="$(echo $a3 | cut -d "E" -f1)"; fi ; export a3
     b1="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $2}')"
     if [[ $b1 =~ "E+00" ]]; then b1="$(echo $b1 | cut -d "E" -f1)"; fi ; export b1
     b2="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $3}')"
     if [[ $b2 =~ "E+00" ]]; then b2="$(echo $b2 | cut -d "E" -f1)"; fi ; export b2
     b3="$(grep "B\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^B/ {print}' | awk '{print $4}')"
     if [[ $b3 =~ "E+00" ]]; then b3="$(echo $b3 | cut -d "E" -f1)"; fi ; export b3
     c1="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $2}')"
     if [[ $c1 =~ "E+00" ]]; then c1="$(echo $c1 | cut -d "E" -f1)"; fi ; export c1
     c2="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $3}')"
     if [[ $c2 =~ "E+00" ]]; then c2="$(echo $c2 | cut -d "E" -f1)"; fi ; export c2
     c3="$(grep "C\ " ${CP2K_input_MolOnSurf} | awk '$1 ~/^C/ {print}' | awk '{print $4}')"
     if [[ $c3 =~ "E+00" ]]; then c3="$(echo $c3 | cut -d "E" -f1)"; fi ; export c3
     ### Paramètres initiaux
     echo 'Name of the molecule (without .mol) '
     read molecule
     echo ''
     module load rdkit
     ### Generation des conformeres
     ${DockOnSurf_path}/modules/generation_conformeres.py $molecule 5000
     echo ' -- CONFORMERS HAVE BEEN GENERATED --'
     ### Conversion des fichiers .mol en .xyz
     ${DockOnSurf_path}/modules/mol_to_xyz.sh $molecule
     ### Lancement des calculs pour les molecules seules
     ${DockOnSurf_path}/modules/launch_cp2k_molecule_seule.sh $molecule
     ### Attente jusqu'à ce que tous les calculs aient fini
     mol="$(echo $molecule | cut -c1-10)"
     while [ "$(qstat | grep $mol | wc -l)" != 0 ];
     do
     	sleep 300
     done
     echo ' -- ALL CALCULATIONS FOR MOLECULE ALONE HAVE BEEN DONE --'
     ### Suppression des fichiers inutiles
     rm -r ${Molecule_results_path}/${molecule}/*/*RESTART*
     rm -r ${Molecule_results_path}/${molecule}/*/*restart*
     rm -r ${Molecule_results_path}/${molecule}/*/*BFGS*
     rm -r ${Molecule_results_path}/${molecule}/*/*.p*
     echo ' '
     echo ' -- RESULTS --'
     min_energy_molecule=0
     for struct in ${Molecule_results_path}/${molecule}/${molecule}*; do
     	energy_molecule="$(awk '/ENERGY/{E=$NF} END{print E}' ${struct}/*${molecule}.out)"
     	if [[ "${energy_molecule}" > "${min_energy_molecule}" ]]
     	then
     		min_energy_molecule=${energy_molecule}
     		numero_mol_stable="$(echo $struct | awk -F'/' '{print $NF}')"
     	fi
     done
     echo "   Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha"
     echo ' '
     echo ' -- ARGUMENTS FOR ADSORPTION -- '
     echo ' '
     echo 'Name of the file to use for the surface (without .xyz) '
     read surface
     echo 'Number of the central atom of the molecule'
     read center
     read -a list_atom_1_mol -p 'List of atoms of the molecule to adsorb '
     read -a list_atom_2_mol -p 'List of neighboring atoms of the molecule (linked to the atoms to adsorb) '
     nombre_atom_mol=${#list_atom_1_mol[@]}
     if [ ${#list_atom_2_mol[@]} != ${nombre_atom_mol} ]
     then
     	echo "  Problem: number of atoms different in the two lists"
     	exit
     fi
     read -a list_atom_surf -p 'List of the atoms of the surface that can be adsorption sites '
     echo 'Value of cutoff energy for reoptimisation (eV)'
     read cutoff
     echo ''
     ### Adsorption de la molecule sur la surface
     ${DockOnSurf_path}/modules/script_add_adsorbate+diss.sh $molecule $surface "${list_atom_1_mol[*]}" "${list_atom_2_mol[*]}" "${list_atom_surf[*]}"
     echo ' -- ADSORPTION SUTRUCTURES HAVE BEEN CALCULATED --'
     # Changes labels of elements to its original definition, thus allowing different kinds of same element (Fe1 Fe2) with different properties (e.g. spin state in compounds with magnetic coupling)
     ${DockOnSurf_path}/modules/fe_change.sh $molecule $surface ${MolOnSurf_results_path}/${molecule}
     ### Lancement des calculs pour les molecules adsorbees
     ${DockOnSurf_path}/modules/launch_script+diss.sh $molecule
     ### Attente jusqu'à ce que tous les calculs aient fini
     mol2="$(echo $molecule | cut -c1-5)"
     while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ];
     do
     	sleep 300
     done
     echo ' -- ALL CALCULATIONS FOR ADSORPTION STRUCTURES HAVE BEEN DONE --'
     ### Suppression des fichiers inutiles
     rm -r ${MolOnSurf_results_path}/${molecule}/*/*RESTART*
     rm -r ${MolOnSurf_results_path}/${molecule}/*/*BFGS*
     rm -r ${MolOnSurf_results_path}/${molecule}/*/*.p*
     ### Clustering et lancement des structures centres des clusters
     nb_at_surf="$(awk 'NR==1{print}' ${Surface_path}/${surface}.xyz)"
     num_center="$(expr ${nb_at_surf} + $center )"
     ${DockOnSurf_path}/modules/script_grandes_molecules+diss.sh $molecule $num_center ${nb_at_surf} ${cutoff}
     echo ' -- CLUSTERING HAS BEEN DONE AND CLUSTER CENTERS CALCULATIONS HAVE BEEN LAUNCHED --'
     ### Attente jusqu'à ce que tous les calculs aient fini
     while [ "$(qstat | grep surf_${mol2} | wc -l)" != 0 ];
     do
             sleep 300
     done
     echo ' -- ALL CLUSTER CENTERS CALCULATIONS HAVE BEEN DONE --'
     echo " "
     echo ' -- RESULTS --'
     echo ' '
     echo "Most stable structure for molecule alone: ${numero_mol_stable} with energy ${min_energy_molecule} Ha"
     min_energy_global=0
     for global_struct in ${MolOnSurf_results_path}/${molecule}/relaunched_calculations/* ; do
     	if [[ "$global_struct" != *.inp ]]
     	then
     		if [[ "$global_struct" != *.j ]]
     		then
     			energy_global="$(awk '/ENERGY/{E=$NF} END{print E}' ${global_struct}/*${molecule}.out)"
     			if [[ "${energy_global}" > "${min_energy_global}" ]]
     			then
     				min_energy_global=${energy_global}
     				numero_global_stable="$(echo ${global_struct} | awk -F'/' '{print $NF}')"
     			fi
     		fi
     	fi
     done
     echo "Most stable structure for adsorption: ${numero_global_stable} with energy ${min_energy_global} Ha"

     #!/usr/bin/env python
     # -*- coding: utf-8 -*-
     import sys
     import numpy as np
     import ase
     from ase import io
     from ase.io import read
     from ase.io import write
     from ase import Atom
     from ase import build
     from ase.build import add_adsorbate
     from ase import constraints
     from ase.constraints import FixAtoms
     from ase import neighborlist
     from ase.neighborlist import NeighborList
     from ase import utils
     import ase_Fe1_Fe2
     if len(sys.argv) < 7 :
         print "This script has to be used with at least 6 arguments."
         print "1. Name of the file containing the surface"
         print "2. Atom number of the atom of the surface where the adsorbate will be adsorbed"
         print "3. Name of the file containing the molecule to be adsorbed"
         print "4. Atom number of the atom of the molecule that will be adsorbed of the surface"
         print "5. Distance between the molecule and the surface"
         print "6. Name of the output file"
         print "Other arguments can be added before the name of the output file :"
         print "  Second atom of the molecule"
         print "  phi angle (rotation around z)"
         print "  theta angle (rotation around new x)"
         print "  psi angle (rotation around new z) "
         sys.exit(1)
     #############################################################
     # Définition de la fonction permettant de calculer les angles
     #############################################################
     def get_proper_angle(v1, v2, degrees=True):
         norm_dot = np.dot(v1/np.linalg.norm(v1), v2/np.linalg.norm(v2))
         angle = np.arccos(np.sign(norm_dot) if np.abs(norm_dot) >= 1 else norm_dot)
         return(angle*180/np.pi if degrees else angle)
     ##########################################
     # Lecture des différents arguments de base
     ##########################################
     surface_file = sys.argv[1]
     atom_surface = int(sys.argv[2])
     molecule_file = sys.argv[3]
     atom_molecule = int(sys.argv[4])
     distance = int(sys.argv[5])
     if len(sys.argv) < 8 :
         output = sys.argv[6]
     surface = read(surface_file)
     molecule = read(molecule_file)
     #################################
     # Lecture des positions de atomes
     #################################
     atom_surf_1 = surface[atom_surface].position
     x_atom_surf_1 = atom_surf_1[0]
     y_atom_surf_1 = atom_surf_1[1]
     z_atom_surf_1 = atom_surf_1[2]
     atom_mol_1 = molecule[atom_molecule].position
     x_atom_mol_1 = atom_mol_1[0]
     y_atom_mol_1 = atom_mol_1[1]
     z_atom_mol_1 = atom_mol_1[2]
     nb_atom_mol = int(len(molecule))
     #######################################################
     # Cas avec un plus grand nombre d'arguments (rotations)
     #######################################################
     if len(sys.argv) > 7 :
         #######################
         # Lecture des arguments
         #######################
         atom_molecule_2 = int(sys.argv[6])
         phi = int(sys.argv[7])
         theta = int(sys.argv[8])
         psi = int(sys.argv[9])
         output = sys.argv[10]
         theta_min = theta - 0.01
         theta_max = theta + 0.01
         phi_min = phi - 0.01
         phi_max = phi + 0.01
         psi_min = psi - 0.01
         psi_max = psi + 0.01
         atom_mol_2 = molecule[atom_molecule_2].position
         x_atom_mol_2 = atom_mol_2[0]
         y_atom_mol_2 = atom_mol_2[1]
         z_atom_mol_2 = atom_mol_2[2]
         ##################################
         # Rotation avec les angles d'Euler
         ##################################
         #Initialisation : placement du second atome de la molecule sur Oy
         vect_y = [0, 1, 0]
         vect_atom_mol_2 = molecule[atom_molecule_2].position - molecule[atom_molecule].position
         angle_1 = get_proper_angle(vect_y, vect_atom_mol_2)
         if (angle_1 != 0) :
             vect_normal = np.cross(vect_y, vect_atom_mol_2)
             molecule.rotate(-angle_1, vect_normal, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
             vect_atom_mol_2_verif = molecule[atom_molecule_2].position - molecule[atom_molecule].position
             angle_1_verif = get_proper_angle(vect_atom_mol_2_verif, vect_y)
             angle_max = 0.01
             angle_min = -0.01
             if (angle_1_verif < angle_min or angle_1_verif > angle_max) :
                 print 'Error in initialisation'
         #Rotation Euler
         molecule.euler_rotate(phi, theta, psi, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
         #Correction des collisions entre la molécule et la surface
         z_atom_surf_max = z_atom_surf_1 + 1
         collision = 0.5
         rotation_tot = 0
         nb_atom_collision_min = 0
         rotation_opt = 0
         rotation_modif = 5
         for i in range(0,nb_atom_mol) :
             atom_test = molecule[i].position
             z_atom_test = atom_test[2]
             z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
             if (z_atom_test_final < z_atom_surf_max) :
                 collision = 1
                 nb_atom_collision_min += 1
         if collision == 1 :
             print 'Collision between the molecule and the surface - modification of theta angle'
         vect_z = [0, 0, 1]
         while (collision == 1 and rotation_tot < 354) :
             vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position
             if (vect_atom_mol_2_post_euler[0] != 0 and vect_atom_mol_2_post_euler[1] != 0) :
                 vect_rotation_theta = np.cross(vect_atom_mol_2_post_euler, vect_z)
             else :
                 vect_rotation_theta = [0.0001, 0.0001 , 0]
             molecule.rotate(rotation_modif, vect_rotation_theta, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
             rotation_tot += 5
             collision = 0
             nb_atom_collision = 0
             for i in range(0,nb_atom_mol) :
                 atom_test = molecule[i].position
                 z_atom_test = atom_test[2]
                 z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
                 if (z_atom_test_final < z_atom_surf_max) :
                     nb_atom_collision += 1
                     collision = 1
             if (nb_atom_collision < nb_atom_collision_min) :
                 nb_atom_collision_min = nb_atom_collision
                 rotation_opt = rotation_tot
             if nb_atom_collision_min != 0 :
                 rotation_tot_2 = 0
                 rotation_opt_2 = 0
                 while (collision == 1 and rotation_tot_2 < 354) :
                     vect_atom_mol_2_post_euler = molecule[atom_molecule_2].position - molecule[atom_molecule].position
                     molecule.rotate(rotation_modif, vect_atom_mol_2_post_euler, center=(x_atom_mol_1,y_atom_mol_1,z_atom_mol_1))
                     rotation_tot_2 += 5
                     collision = 0
                     nb_atom_collision = 0
                     for i in range(0,nb_atom_mol) :
                         atom_test = molecule[i].position
                         z_atom_test = atom_test[2]
                         z_atom_test_final = z_atom_test - z_atom_mol_1 + z_atom_surf_1 + distance
                         if (z_atom_test_final < z_atom_surf_max) :
                             nb_atom_collision += 1
                             collision = 1
                     if (nb_atom_collision < nb_atom_collision_min) :
                         nb_atom_collision_min = nb_atom_collision
                         rotation_opt_2 = rotation_tot_2
         if collision == 0 :
             print 'Collision corrected'
         elif collision == 1 :
             print 'Error: the collision could not be corrected'
             pos1 = output.rfind('/')
             pos = pos1 + 1
             num_coll=output[pos:]
             j = output[:pos1]
             pos2 = j.rfind('/')
             molecule_directory = j[:pos2]
             fichier = open("%s/errors" % molecule_directory , "a")
             fichier.write("Adsorption %s : ERROR the collision could not be corrected\n" % num_coll )
             fichier.close()
     ##########################################
     # Adsorption de la molecule sur la surface
     ##########################################
     add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule)
     out=output+".xyz"
     write(out, surface)
     ############################
     # Dissociation si necessaire
     ############################
     if collision == 0 :
         surface = read(surface_file)
         molecule_reminder = molecule
         list_cutoffs = utils.natural_cutoffs(molecule)
         nl=NeighborList(list_cutoffs, self_interaction=False, bothways=True)
         nl.update(molecule)
         neighbor_indices, offsets = nl.get_neighbors(atom_molecule)
         symbols = molecule.get_chemical_symbols()
         neighbors_symbols=[]
         for i in neighbor_indices:
                 neighbors_symbols.append(symbols[i])
         nb_neighbors = len(neighbor_indices)
         diss=0
         for i in range(0,nb_neighbors):
             if neighbors_symbols[i] == "H":
                 diss=1
                 H_atom=neighbor_indices[i] #nb of the H atom
                 print "Dissociation possible"
         if diss == 1:
             list_cutoffs_surface = utils.natural_cutoffs(surface)
             nl2=NeighborList(list_cutoffs_surface, self_interaction=False, bothways=True)
             nl2.update(surface)
             neighbor_indices_surf, offsets_surf = nl2.get_neighbors(atom_surface)
             symbols_surf = surface.get_chemical_symbols()
             for i in neighbor_indices_surf:
                 surface = read(surface_file)
                 molecule = molecule_reminder
                 neighbor_surf_coord = surface[i].position
                 coord_H_atom=molecule[H_atom].position
                 if (neighbor_surf_coord[2] > z_atom_surf_1-1 and symbols_surf[i] == "O"):
                     vector_H_diss=[neighbor_surf_coord[0]-coord_H_atom[0], neighbor_surf_coord[1]-coord_H_atom[1], neighbor_surf_coord[2]+1-coord_H_atom[2]]
                     translation_matrix_dissociation=[]
                     for k in range (0, nb_atom_mol):
                         if k != H_atom:
                             translation_matrix_dissociation.append([0,0,0])
                         else:
                             translation_matrix_dissociation.append(vector_H_diss)
                     molecule.translate(translation_matrix_dissociation)
                     add_adsorbate(surface, molecule, distance, (x_atom_surf_1,y_atom_surf_1), mol_index=atom_molecule)
                     output_diss = output + "_diss_" + str(i) + ".xyz"
                     write(output_diss, surface)

     #!/usr/bin/env python
     # -*- coding: utf-8 -*-
     import ase
     from ase import Atom
     from ase import data
     import numpy as np
     # Chemical symbols of new types of atoms
     data.chemical_symbols += ['Fe1', 'Fe2']
     data.chemical_symbols += ['O1']
     # Atomic numbers
     Z_Fe = data.atomic_numbers['Fe']
     Z_O = data.atomic_numbers['O']
     data.atomic_numbers['Fe1'] = Z_Fe
     data.atomic_numbers['Fe2'] = Z_Fe
     data.atomic_numbers['O1'] = Z_O
     # Atomic names
     data.atomic_names += ['Iron', 'Iron']
     data.atomic_names += ['Oxygen']
     # Atomic masses
     Fe_mass = data.atomic_masses_iupac2016[Z_Fe]
     O_mass = data.atomic_masses_iupac2016[Z_O]
     data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([Fe_mass,Fe_mass])))
     data.atomic_masses_iupac2016 = np.concatenate((data.atomic_masses_iupac2016, np.array([O_mass])))
     data.atomic_masses = data.atomic_masses_iupac2016
     Fe_mass = data.atomic_masses_legacy[Z_Fe]
     O_mass = data.atomic_masses_legacy[Z_O]
     data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([Fe_mass,Fe_mass])))
     data.atomic_masses_legacy = np.concatenate((data.atomic_masses_legacy, np.array([O_mass])))
     # Atomic radius
     Fe_radius = data.covalent_radii[Z_Fe]
     O_radius = data.covalent_radii[Z_O]
     data.covalent_radii = np.concatenate((data.covalent_radii, np.array([Fe_radius,Fe_radius])))
     data.covalent_radii = np.concatenate((data.covalent_radii, np.array([O_radius])))
     # Reference and ground state datas
     data.reference_states += [data.reference_states[Z_Fe], data.reference_states[Z_Fe]]
     data.reference_states += [data.reference_states[Z_O]]
     Fe_gsmm = data.ground_state_magnetic_moments[Z_Fe]
     O_gsmm = data.ground_state_magnetic_moments[Z_O]
     data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([Fe_gsmm,Fe_gsmm])))
     data.ground_state_magnetic_moments = np.concatenate((data.ground_state_magnetic_moments, np.array([O_gsmm])))
     # VdW raduis
     Fe_vdw_r = data.vdw_radii[Z_Fe]
     O_vdw_r = data.vdw_radii[Z_O]
     data.vdw_radii = np.concatenate((data.vdw_radii, np.array([Fe_vdw_r,Fe_vdw_r])))
     data.vdw_radii = np.concatenate((data.vdw_radii, np.array([O_vdw_r])))

     #!/usr/bin/env python
     __doc__ = \
     """
     Calculate Root-mean-square deviation (RMSD) between structure A and B, in XYZ
     or PDB format, using transformation and rotation.
     For more information, usage, example and citation read more at
     https://github.com/charnley/rmsd
     """
     __version__ = '1.3.2'
     import copy
     import re
     import numpy as np
     from scipy.optimize import linear_sum_assignment
     from scipy.spatial.distance import cdist
     AXIS_SWAPS = np.array([
         [0, 1, 2],
         [0, 2, 1],
         [1, 0, 2],
         [1, 2, 0],
         [2, 1, 0],
         [2, 0, 1]])
     AXIS_REFLECTIONS = np.array([
         [1, 1, 1],
         [-1, 1, 1],
         [1, -1, 1],
         [1, 1, -1],
         [-1, -1, 1],
         [-1, 1, -1],
         [1, -1, -1],
         [-1, -1, -1]])
     def rmsd(V, W):
         """
         Calculate Root-mean-square deviation from two sets of vectors V and W.
         Parameters
         ----------
         V : array
             (N,D) matrix, where N is points and D is dimension.
         W : array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         rmsd : float
             Root-mean-square deviation between the two vectors
         """
         D = len(V[0])
         N = len(V)
         result = 0.0
         for v, w in zip(V, W):
             result += sum([(v[i] - w[i])**2.0 for i in range(D)])
         return np.sqrt(result/N)
     def kabsch_rmsd(P, Q, translate=False):
         """
         Rotate matrix P unto Q using Kabsch algorithm and calculate the RMSD.
         Parameters
         ----------
         P : array
             (N,D) matrix, where N is points and D is dimension.
         Q : array
             (N,D) matrix, where N is points and D is dimension.
         translate : bool
             Use centroids to translate vector P and Q unto each other.
         Returns
         -------
         rmsd : float
             root-mean squared deviation
         """
         if translate:
             Q = Q - centroid(Q)
             P = P - centroid(P)
         P = kabsch_rotate(P, Q)
         return rmsd(P, Q)
     def kabsch_rotate(P, Q):
         """
         Rotate matrix P unto matrix Q using Kabsch algorithm.
         Parameters
         ----------
         P : array
             (N,D) matrix, where N is points and D is dimension.
         Q : array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         P : array
             (N,D) matrix, where N is points and D is dimension,
             rotated
         """
         U = kabsch(P, Q)
         # Rotate P
         P = np.dot(P, U)
         return P
     def kabsch(P, Q):
         """
         Using the Kabsch algorithm with two sets of paired point P and Q, centered
         around the centroid. Each vector set is represented as an NxD
         matrix, where D is the the dimension of the space.
         The algorithm works in three steps:
         - a centroid translation of P and Q (assumed done before this function
           call)
         - the computation of a covariance matrix C
         - computation of the optimal rotation matrix U
         For more info see http://en.wikipedia.org/wiki/Kabsch_algorithm
         Parameters
         ----------
         P : array
             (N,D) matrix, where N is points and D is dimension.
         Q : array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         U : matrix
             Rotation matrix (D,D)
         """
         # Computation of the covariance matrix
         C = np.dot(np.transpose(P), Q)
         # Computation of the optimal rotation matrix
         # This can be done using singular value decomposition (SVD)
         # Getting the sign of the det(V)*(W) to decide
         # whether we need to correct our rotation matrix to ensure a
         # right-handed coordinate system.
         # And finally calculating the optimal rotation matrix U
         # see http://en.wikipedia.org/wiki/Kabsch_algorithm
         V, S, W = np.linalg.svd(C)
         d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
         if d:
             S[-1] = -S[-1]
             V[:, -1] = -V[:, -1]
         # Create Rotation matrix U
         U = np.dot(V, W)
         return U
     def quaternion_rmsd(P, Q):
         """
         Rotate matrix P unto Q and calculate the RMSD
         based on doi:10.1016/1049-9660(91)90036-O
         Parameters
         ----------
         P : array
             (N,D) matrix, where N is points and D is dimension.
         Q : array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         rmsd : float
         """
         rot = quaternion_rotate(P, Q)
         P = np.dot(P, rot)
         return rmsd(P, Q)
     def quaternion_transform(r):
         """
         Get optimal rotation
         note: translation will be zero when the centroids of each molecule are the
         same
         """
         Wt_r = makeW(*r).T
         Q_r = makeQ(*r)
         rot = Wt_r.dot(Q_r)[:3, :3]
         return rot
     def makeW(r1, r2, r3, r4=0):
         """
         matrix involved in quaternion rotation
         """
         W = np.asarray([
             [r4, r3, -r2, r1],
             [-r3, r4, r1, r2],
             [r2, -r1, r4, r3],
             [-r1, -r2, -r3, r4]])
         return W
     def makeQ(r1, r2, r3, r4=0):
         """
         matrix involved in quaternion rotation
         """
         Q = np.asarray([
             [r4, -r3, r2, r1],
             [r3, r4, -r1, r2],
             [-r2, r1, r4, r3],
             [-r1, -r2, -r3, r4]])
         return Q
     def quaternion_rotate(X, Y):
         """
         Calculate the rotation
         Parameters
         ----------
         X : array
             (N,D) matrix, where N is points and D is dimension.
         Y: array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         rot : matrix
             Rotation matrix (D,D)
         """
         N = X.shape[0]
         W = np.asarray([makeW(*Y[k]) for k in range(N)])
         Q = np.asarray([makeQ(*X[k]) for k in range(N)])
         Qt_dot_W = np.asarray([np.dot(Q[k].T, W[k]) for k in range(N)])
         W_minus_Q = np.asarray([W[k] - Q[k] for k in range(N)])
         A = np.sum(Qt_dot_W, axis=0)
         eigen = np.linalg.eigh(A)
         r = eigen[1][:, eigen[0].argmax()]
         rot = quaternion_transform(r)
         return rot
     def centroid(X):
         """
         Centroid is the mean position of all the points in all of the coordinate
         directions, from a vectorset X.
         https://en.wikipedia.org/wiki/Centroid
         C = sum(X)/len(X)
         Parameters
         ----------
         X : array
             (N,D) matrix, where N is points and D is dimension.
         Returns
         -------
         C : float
             centroid
         """
         C = X.mean(axis=0)
         return C
     def reorder_distance(p_atoms, q_atoms, p_coord, q_coord):
         """
         Re-orders the input atom list and xyz coordinates by atom type and then by
         distance of each atom from the centroid.
         Parameters
         ----------
         atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         coord : array
             (N,D) matrix, where N is points and D is dimension
         Returns
         -------
         atoms_reordered : array
             (N,1) matrix, where N is points holding the ordered atoms' names
         coords_reordered : array
             (N,D) matrix, where N is points and D is dimension (rows re-ordered)
         """
         # Find unique atoms
         unique_atoms = np.unique(p_atoms)
         # generate full view from q shape to fill in atom view on the fly
         view_reorder = np.zeros(q_atoms.shape, dtype=int)
         for atom in unique_atoms:
             p_atom_idx, = np.where(p_atoms == atom)
             q_atom_idx, = np.where(q_atoms == atom)
             A_coord = p_coord[p_atom_idx]
             B_coord = q_coord[q_atom_idx]
             # Calculate distance from each atom to centroid
             A_norms = np.linalg.norm(A_coord, axis=1)
             B_norms = np.linalg.norm(B_coord, axis=1)
             reorder_indices_A = np.argsort(A_norms)
             reorder_indices_B = np.argsort(B_norms)
             # Project the order of P onto Q
             translator = np.argsort(reorder_indices_A)
             view = reorder_indices_B[translator]
             view_reorder[p_atom_idx] = q_atom_idx[view]
         return view_reorder
     def hungarian(A, B):
         """
         Hungarian reordering.
         Assume A and B are coordinates for atoms of SAME type only
         """
         # should be kabasch here i think
         distances = cdist(A, B, 'euclidean')
         # Perform Hungarian analysis on distance matrix between atoms of 1st
         # structure and trial structure
         indices_a, indices_b = linear_sum_assignment(distances)
         return indices_b
     def reorder_hungarian(p_atoms, q_atoms, p_coord, q_coord):
         """
         Re-orders the input atom list and xyz coordinates using the Hungarian
         method (using optimized column results)
         Parameters
         ----------
         p_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         p_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         p_coord : array
             (N,D) matrix, where N is points and D is dimension
         q_coord : array
             (N,D) matrix, where N is points and D is dimension
         Returns
         -------
         view_reorder : array
                  (N,1) matrix, reordered indexes of atom alignment based on the
                  coordinates of the atoms
         """
         # Find unique atoms
         unique_atoms = np.unique(p_atoms)
         # generate full view from q shape to fill in atom view on the fly
         view_reorder = np.zeros(q_atoms.shape, dtype=int)
         view_reorder -= 1
         for atom in unique_atoms:
             p_atom_idx, = np.where(p_atoms == atom)
             q_atom_idx, = np.where(q_atoms == atom)
             A_coord = p_coord[p_atom_idx]
             B_coord = q_coord[q_atom_idx]
             view = hungarian(A_coord, B_coord)
             view_reorder[p_atom_idx] = q_atom_idx[view]
         return view_reorder
     def generate_permutations(elements, n):
         """
         Heap's algorithm for generating all n! permutations in a list
         https://en.wikipedia.org/wiki/Heap%27s_algorithm
         """
         c = [0] * n
         yield elements
         i = 0
         while i < n:
             if c[i] < i:
                 if i % 2 == 0:
                     elements[0], elements[i] = elements[i], elements[0]
                 else:
                     elements[c[i]], elements[i] = elements[i], elements[c[i]]
                 yield elements
                 c[i] += 1
                 i = 0
             else:
                 c[i] = 0
                 i += 1
     def brute_permutation(A, B):
         """
         Re-orders the input atom list and xyz coordinates using the brute force
         method of permuting all rows of the input coordinates
         Parameters
         ----------
         A : array
             (N,D) matrix, where N is points and D is dimension
         B : array
             (N,D) matrix, where N is points and D is dimension
         Returns
         -------
         view : array
             (N,1) matrix, reordered view of B projected to A
         """
         rmsd_min = np.inf
         view_min = None
         # Sets initial ordering for row indices to [0, 1, 2, ..., len(A)], used in
         # brute-force method
         num_atoms = A.shape[0]
         initial_order = list(range(num_atoms))
         for reorder_indices in generate_permutations(initial_order, num_atoms):
             # Re-order the atom array and coordinate matrix
             coords_ordered = B[reorder_indices]
             # Calculate the RMSD between structure 1 and the Hungarian re-ordered
             # structure 2
             rmsd_temp = kabsch_rmsd(A, coords_ordered)
             # Replaces the atoms and coordinates with the current structure if the
             # RMSD is lower
             if rmsd_temp < rmsd_min:
                 rmsd_min = rmsd_temp
                 view_min = copy.deepcopy(reorder_indices)
         return view_min
     def reorder_brute(p_atoms, q_atoms, p_coord, q_coord):
         """
         Re-orders the input atom list and xyz coordinates using all permutation of
         rows (using optimized column results)
         Parameters
         ----------
         p_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         q_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         p_coord : array
             (N,D) matrix, where N is points and D is dimension
         q_coord : array
             (N,D) matrix, where N is points and D is dimension
         Returns
         -------
         view_reorder : array
             (N,1) matrix, reordered indexes of atom alignment based on the
             coordinates of the atoms
         """
         # Find unique atoms
         unique_atoms = np.unique(p_atoms)
         # generate full view from q shape to fill in atom view on the fly
         view_reorder = np.zeros(q_atoms.shape, dtype=int)
         view_reorder -= 1
         for atom in unique_atoms:
             p_atom_idx, = np.where(p_atoms == atom)
             q_atom_idx, = np.where(q_atoms == atom)
             A_coord = p_coord[p_atom_idx]
             B_coord = q_coord[q_atom_idx]
             view = brute_permutation(A_coord, B_coord)
             view_reorder[p_atom_idx] = q_atom_idx[view]
         return view_reorder
     def check_reflections(p_atoms, q_atoms, p_coord, q_coord,
                           reorder_method=reorder_hungarian,
                           rotation_method=kabsch_rmsd,
                           keep_stereo=False):
         """
         Minimize RMSD using reflection planes for molecule P and Q
         Warning: This will affect stereo-chemistry
         Parameters
         ----------
         p_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         q_atoms : array
             (N,1) matrix, where N is points holding the atoms' names
         p_coord : array
             (N,D) matrix, where N is points and D is dimension
         q_coord : array
             (N,D) matrix, where N is points and D is dimension
         Returns
         -------
         min_rmsd
         min_swap
         min_reflection
         min_review
         """
         min_rmsd = np.inf
         min_swap = None
         min_reflection = None
         min_review = None
         tmp_review = None
         swap_mask = [1,-1,-1,1,-1,1]
         reflection_mask = [1,-1,-1,-1,1,1,1,-1]
         for swap, i in zip(AXIS_SWAPS, swap_mask):
             for reflection, j in zip(AXIS_REFLECTIONS, reflection_mask):
                 if keep_stereo and  i * j == -1: continue # skip enantiomers
                 tmp_atoms = copy.copy(q_atoms)
                 tmp_coord = copy.deepcopy(q_coord)
                 tmp_coord = tmp_coord[:, swap]
                 tmp_coord = np.dot(tmp_coord, np.diag(reflection))
                 tmp_coord -= centroid(tmp_coord)
                 # Reorder
                 if reorder_method is not None:
                     tmp_review = reorder_method(p_atoms, tmp_atoms, p_coord, tmp_coord)
                     tmp_coord = tmp_coord[tmp_review]
                     tmp_atoms = tmp_atoms[tmp_review]
                 # Rotation
                 if rotation_method is None:
                     this_rmsd = rmsd(p_coord, tmp_coord)
                 else:
                     this_rmsd = rotation_method(p_coord, tmp_coord)
                 if this_rmsd < min_rmsd:
                     min_rmsd = this_rmsd
                     min_swap = swap
                     min_reflection = reflection
                     min_review = tmp_review
         if not (p_atoms == q_atoms[min_review]).all():
             print("error: Not aligned")
             quit()
         return min_rmsd, min_swap, min_reflection, min_review
     def set_coordinates(atoms, V, title="", decimals=8):
         """
         Print coordinates V with corresponding atoms to stdout in XYZ format.
         Parameters
         ----------
         atoms : list
             List of atomic types
         V : array
             (N,3) matrix of atomic coordinates
         title : string (optional)
             Title of molecule
         decimals : int (optional)
             number of decimals for the coordinates
         Return
         ------
         output : str
             Molecule in XYZ format
         """
         N, D = V.shape
         fmt = "{:2s}" + (" {:15."+str(decimals)+"f}")*3
         out = list()
         out += [str(N)]
         out += [title]
         for i in range(N):
             atom = atoms[i]
             atom = atom[0].upper() + atom[1:]
             out += [fmt.format(atom, V[i, 0], V[i, 1], V[i, 2])]
         return "\n".join(out)
     def print_coordinates(atoms, V, title=""):
         """
         Print coordinates V with corresponding atoms to stdout in XYZ format.
         Parameters
         ----------
         atoms : list
             List of element types
         V : array
             (N,3) matrix of atomic coordinates
         title : string (optional)
             Title of molecule
         """
         print(set_coordinates(atoms, V, title=title))
         return
     def get_coordinates(filename, fmt):
         """
         Get coordinates from filename in format fmt. Supports XYZ and PDB.
         Parameters
         ----------
         filename : string
             Filename to read
         fmt : string
             Format of filename. Either xyz or pdb.
         Returns
         -------
         atoms : list
             List of atomic types
         V : array
             (N,3) where N is number of atoms
         """
         if fmt == "xyz":
             get_func = get_coordinates_xyz
         elif fmt == "pdb":
             get_func = get_coordinates_pdb
         else:
             exit("Could not recognize file format: {:s}".format(fmt))
         return get_func(filename)
     def get_coordinates_pdb(filename):
         """
         Get coordinates from the first chain in a pdb file
         and return a vectorset with all the coordinates.
         Parameters
         ----------
         filename : string
             Filename to read
         Returns
         -------
         atoms : list
             List of atomic types
         V : array
             (N,3) where N is number of atoms
         """
         # PDB files tend to be a bit of a mess. The x, y and z coordinates
         # are supposed to be in column 31-38, 39-46 and 47-54, but this is
         # not always the case.
         # Because of this the three first columns containing a decimal is used.
         # Since the format doesn't require a space between columns, we use the
         # above column indices as a fallback.
         x_column = None
         V = list()
         # Same with atoms and atom naming.
         # The most robust way to do this is probably
         # to assume that the atomtype is given in column 3.
         atoms = list()
         with open(filename, 'r') as f:
             lines = f.readlines()
             for line in lines:
                 if line.startswith("TER") or line.startswith("END"):
                     break
                 if line.startswith("ATOM"):
                     tokens = line.split()
                     # Try to get the atomtype
                     try:
                         atom = tokens[2][0]
                         if atom in ("H", "C", "N", "O", "S", "P"):
                             atoms.append(atom)
                         else:
                             # e.g. 1HD1
                             atom = tokens[2][1]
                             if atom == "H":
                                 atoms.append(atom)
                             else:
                                 raise Exception
                     except:
                         exit("error: Parsing atomtype for the following line: \n{0:s}".format(line))
                     if x_column == None:
                         try:
                             # look for x column
                             for i, x in enumerate(tokens):
                                 if "." in x and "." in tokens[i + 1] and "." in tokens[i + 2]:
                                     x_column = i
                                     break
                         except IndexError:
                             exit("error: Parsing coordinates for the following line: \n{0:s}".format(line))
                     # Try to read the coordinates
                     try:
                         V.append(np.asarray(tokens[x_column:x_column + 3], dtype=float))
                     except:
                         # If that doesn't work, use hardcoded indices
                         try:
                             x = line[30:38]
                             y = line[38:46]
                             z = line[46:54]
                             V.append(np.asarray([x, y ,z], dtype=float))
                         except:
                             exit("error: Parsing input for the following line: \n{0:s}".format(line))
         V = np.asarray(V)
         atoms = np.asarray(atoms)
         assert V.shape[0] == atoms.size
         return atoms, V
     def get_coordinates_xyz(filename):
         """
         Get coordinates from filename and return a vectorset with all the
         coordinates, in XYZ format.
         Parameters
         ----------
         filename : string
             Filename to read
         Returns
         -------
         atoms : list
             List of atomic types
         V : array
             (N,3) where N is number of atoms
         """
         f = open(filename, 'r')
         V = list()
         atoms = list()
         n_atoms = 0
         # Read the first line to obtain the number of atoms to read
         try:
             n_atoms = int(f.readline())
         except ValueError:
             exit("error: Could not obtain the number of atoms in the .xyz file.")
         # Skip the title line
         f.readline()
         # Use the number of atoms to not read beyond the end of a file
         for lines_read, line in enumerate(f):
             if lines_read == n_atoms:
                 break
             atom = re.findall(r'[a-zA-Z]+', line)[0]
             atom = atom.upper()
             numbers = re.findall(r'[-]?\d+\.\d*(?:[Ee][-\+]\d+)?', line)
             numbers = [float(number) for number in numbers]
             # The numbers are not valid unless we obtain exacly three
             if len(numbers) >= 3:
                 V.append(np.array(numbers)[:3])
                 atoms.append(atom)
             else:
                 exit("Reading the .xyz file failed in line {0}. Please check the format.".format(lines_read + 2))
         f.close()
         atoms = np.array(atoms)
         V = np.array(V)
         return atoms, V
     def main():
         import argparse
         import sys
         description = __doc__
         version_msg = """
     rmsd {}
     See https://github.com/charnley/rmsd for citation information
     """
         version_msg = version_msg.format(__version__)
         epilog = """
     """
         parser = argparse.ArgumentParser(
             usage='calculate_rmsd [options] FILE_A FILE_B',
             description=description,
             formatter_class=argparse.RawDescriptionHelpFormatter,
             epilog=epilog)
         # Input structures
         parser.add_argument('structure_a', metavar='FILE_A', type=str, help='structures in .xyz or .pdb format')
         parser.add_argument('structure_b', metavar='FILE_B', type=str)
         # Admin
         parser.add_argument('-v', '--version', action='version', version=version_msg)
         # Rotation
         parser.add_argument('-r', '--rotation', action='store', default="kabsch", help='select rotation method. "kabsch" (default), "quaternion" or "none"', metavar="METHOD")
         # Reorder arguments
         parser.add_argument('-e', '--reorder', action='store_true', help='align the atoms of molecules (default: Hungarian)')
         parser.add_argument('--reorder-method', action='store', default="hungarian", metavar="METHOD", help='select which reorder method to use; hungarian (default), brute, distance')
         parser.add_argument('--use-reflections', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). This will affect stereo-chemistry.')
         parser.add_argument('--use-reflections-keep-stereo', action='store_true', help='scan through reflections in planes (eg Y transformed to -Y -> X, -Y, Z) and axis changes, (eg X and Z coords exchanged -> Z, Y, X). Stereo-chemistry will be kept.')
         # Filter
         index_group = parser.add_mutually_exclusive_group()
         index_group.add_argument('-nh', '--no-hydrogen', action='store_true', help='ignore hydrogens when calculating RMSD')
         index_group.add_argument('--remove-idx', nargs='+', type=int, help='index list of atoms NOT to consider', metavar='IDX')
         index_group.add_argument('--add-idx', nargs='+', type=int, help='index list of atoms to consider', metavar='IDX')
         # format and print
         parser.add_argument('--format', action='store', help='format of input files. valid format are xyz and pdb', metavar='FMT')
         parser.add_argument('-p', '--output', '--print', action='store_true', help='print out structure B, centered and rotated unto structure A\'s coordinates in XYZ format')
         if len(sys.argv) == 1:
             parser.print_help()
             sys.exit(1)
         args = parser.parse_args()
         # As default, load the extension as format
         if args.format is None:
             args.format = args.structure_a.split('.')[-1]
         p_all_atoms, p_all = get_coordinates(args.structure_a, args.format)
         q_all_atoms, q_all = get_coordinates(args.structure_b, args.format)
         p_size = p_all.shape[0]
         q_size = q_all.shape[0]
         if not p_size == q_size:
             print("error: Structures not same size")
             quit()
         if np.count_nonzero(p_all_atoms != q_all_atoms) and not args.reorder:
             msg = """
     error: Atoms are not in the same order.
     Use --reorder to align the atoms (can be expensive for large structures).
     Please see --help or documentation for more information or
     https://github.com/charnley/rmsd for further examples.
     """
             print(msg)
             exit()
         # Set local view
         p_view = None
         q_view = None
         if args.no_hydrogen:
             p_view = np.where(p_all_atoms != 'H')
             q_view = np.where(q_all_atoms != 'H')
         elif args.remove_idx:
             index = range(p_size)
             index = set(index) - set(args.remove_idx)
             index = list(index)
             p_view = index
             q_view = index
         elif args.add_idx:
             p_view = args.add_idx
             q_view = args.add_idx
         # Set local view
         if p_view is None:
             p_coord = copy.deepcopy(p_all)
             q_coord = copy.deepcopy(q_all)
             p_atoms = copy.deepcopy(p_all_atoms)
             q_atoms = copy.deepcopy(q_all_atoms)
         else:
             if args.reorder and args.output:
                 print("error: Cannot reorder atoms and print structure, when excluding atoms (such as --no-hydrogen)")
                 quit()
             if args.use_reflections and args.output:
                 print("error: Cannot use reflections on atoms and print, when excluding atoms (such as --no-hydrogen)")
                 quit()
             p_coord = copy.deepcopy(p_all[p_view])
             q_coord = copy.deepcopy(q_all[q_view])
             p_atoms = copy.deepcopy(p_all_atoms[p_view])
             q_atoms = copy.deepcopy(q_all_atoms[q_view])

... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff

Chimie Théorique » scripts_chimie4psmn » DockOnSurf

Révision 86112fec