Révision 4e82c425

b/dockonsurf.py
21 21

  
22 22
with daemon.DaemonContext(working_directory=os.getcwd(), umask=0o002,
23 23
                          files_preserve=[logger.handlers[0].stream.fileno()]):
24
    # From here on, the execution is carried out by a separate process in
25
    # background
24 26
    logger.info(f'DockOnSurf started on {os.getcwd()}.')
25 27
    logger.info(f'To kill DockOnSurf execution type: `$ kill {os.getpid()}`.')
26 28
    logger.info(f"Using '{args.input}' as input.")
b/modules/calculation.py
1
"""Module for the submission or execution of the calculations to be done
2

  
3
functions:
4
check_finished_calcs: Checks if the calculations finished normally or not.
5
prep_cp2k: Prepares the directories to run calculations with CP2K.
6
prep_vasp: Prepares the directories to run calculations with VASP.
7
get_jobs_status: Returns a list of job status for a list of job ids.
8
submit_jobs: Submits jobs to a custom queuing system with the provided script
9
run_calc: Directs calculation run/submission.
10
"""
11

  
1 12
import os
2 13
import logging
3 14

  
......
5 16

  
6 17

  
7 18
def check_finished_calcs(run_type, code):
8
    from modules.utilities import _human_key
9
    """Returns two lists of calculations finished normally and abnormally.
19
    """Checks if the calculations finished normally or not.
10 20

  
11 21
    @param run_type: The type of calculation to check.
12 22
    @param code: The code used for the specified job.
13
    @return finished_calcs: List of calculation directories that have finished 
23
    @return finished_calcs: List of calculation directories that have finished
14 24
    normally.
15
    @return unfinished_calcs: List of calculation directories that have finished 
25
    @return unfinished_calcs: List of calculation directories that have finished
16 26
    abnormally.
17 27
    """
28
    from modules.utilities import _human_key
18 29
    from glob import glob
19 30
    import ase.io
20 31
    from modules.utilities import tail, is_binary
......
182 193
            incar = Incar.from_file(inp_file)
183 194
            incar["SYSTEM"] = proj_name+"_"+run_type
184 195

  
196
    # Builds the directory hierarchy and copies/creates the relevant files
185 197
    for c, conf in enumerate(atms_list):
186 198
        subdir = f'{run_type}/conf_{c}/'
187 199
        os.mkdir(subdir)
......
297 309

  
298 310

  
299 311
def run_calc(run_type, inp_vars, atms_list):
300
    """Directs the calculation run according to the provided arguments.
312
    """Directs the calculation run/submission.
301 313

  
302 314
    @param run_type: Type of calculation. 'isolated', 'screening' or
303 315
    'refinement'
......
353 365
        submit_jobs(run_type, 'ccc_msub -r %s %s', inp_vars['subm_script'],
354 366
                    stat_cmd, stat_dict, inp_vars['max_jobs'],
355 367
                    inp_vars['project_name'])
356

  
357 368
    elif inp_vars['batch_q_sys'] == 'local':
358 369
        pass  # TODO implement local
359 370
    elif not inp_vars['batch_q_sys']:
b/modules/clustering.py
1 1
"""Functions to cluster structures.
2 2

  
3 3
functions:
4
get_rmsd: Computes the rmsd matrix of the conformers in a list of rdkit mol
5
    objects.
4 6
get_labels_affty: Clusters data in affinity matrix form by assigning labels to
5
data points.
7
    data points.
6 8
get_labels_vector: Clusters data in vectorial form by assigning labels to
7
data points.
9
    data points.
8 10
get_clusters: Groups data-points belonging to the same cluster into arrays of
9
indices.
11
    indices.
10 12
get_exemplars_affty: Computes the exemplars for every cluster and returns a list
11
of indices.
13
    of indices.
12 14
plot_clusters: Plots the clustered data casting a color to every cluster.
13 15
clustering: Directs the clustering process by calling the relevant functions.
14 16
"""
......
21 23

  
22 24

  
23 25
def get_rmsd(mol_list: list, remove_Hs="c"):
24
    """Computes the rmsd matrix of the conformers in a rdkit mol object.
26
    """Computes the rmsd matrix of the conformers in a list of rdkit mol objects
25 27

  
26 28
    @param mol_list: list of rdkit mol objects containing the conformers.
27 29
    @param remove_Hs: bool or str,
......
92 94
    @return: list of cluster labels. Every data point is assigned a number
93 95
    corresponding to the cluster it belongs to.
94 96
    """
97
    # TODO Implement it.
95 98
    return []
96 99

  
97 100

  
......
115 118
    distances between points, RMSD Matrix, etc.) shape: [n_points, n_points].
116 119
    @param clusters: tuple of arrays. Every array contains the indices (relative
117 120
    to the affinity matrix) of the data points belonging to the same cluster.
118
    @return: list of indices (relative to the affinity matrix) exemplars for
119
    every cluster.
121
    @return: list of indices (relative to the affinity matrix) of the exemplars
122
    for every cluster.
123

  
124
    This function finds the exemplars of already clusterized data. It does
125
    that by (i) building a rmsd matrix for each existing cluster with the values
126
    of the total RMSD matrix (ii) carrying out an actual clustering for each
127
    cluster-specific matrix using a set of parameters (large negative value of
128
    preference) such that it always finds only one cluster and (iii) it then
129
    calculates the exemplar for the matrix.
120 130
    """
121 131
    from sklearn.cluster import AffinityPropagation
132
    # Splits Total RMSD matrix into cluster-specific RMSD matrices.
122 133
    clust_affty_mtcs = tuple(affty_mtx[np.ix_(clust, clust)]
123 134
                             for clust in clusters)
124 135
    exemplars = []
136
    # Carries out the forced-to-converge-to-1 clustering for each already
137
    # existing cluster rmsd matrix and calculates the exemplar.
125 138
    for i, mtx in enumerate(clust_affty_mtcs):
126 139
        pref = -1e6 * np.max(np.abs(mtx))
127 140
        af = AffinityPropagation(affinity='precomputed', preference=pref,
b/modules/config_log.py
1
"""Configures the logger to record all calculation events on a log file."""
1
"""Module for the configuration of how and what is recorded in the log file."""
2 2
import sys
3 3
import logging
4 4
import warnings
5 5

  
6 6

  
7 7
def log_exception(exc_type, exc_value, exc_tb):
8
    """Sets up the recording of exceptions on the log file
9

  
10
    @param exc_type: Type of exception
11
    @param exc_value: Value of the exception
12
    @param exc_tb:
13
    @return: None
14
    """
8 15
    if issubclass(exc_type, KeyboardInterrupt):
9 16
        sys.__excepthook__(exc_type, exc_value, exc_tb)
10 17
        return
......
13 20

  
14 21

  
15 22
def log_warning(message, *args, **kwargs):
23
    """Sets up the recording of warnings on the log file
24

  
25
    @param message: Warning message.
26
    @param args: Additional arguments.
27
    @param kwargs: Additional keyword arguments.
28
    @return: None
29
    """
16 30
    logger = logging.getLogger('DockOnSurf')
17 31
    logger.warning(" ".join(f"{message}".split()))
18 32

  
19 33

  
20
def config_log(label):  # TODO Format log to break line after column 80.
34
def config_log(label):  # TODO Format log to break long lines (after column 80).
35
    """Configures the logger to record all calculation events on a log file.
36

  
37
    @param label: Label of the logger to be used.
38
    @return: The logger object.
39
    """
21 40
    logger = logging.getLogger(label)
22 41
    logger.setLevel(logging.INFO)
23 42

  
b/modules/dos_input.py
1 1
"""Functions to deal with DockOnSurf input files.
2 2

  
3
Functions
4
try_command:Tries to run a command and logs its exceptions (expected and not).
3
List of functions:
4

  
5
Auxiliary functions
6
-------------------
5 7
str2lst: Converts a string of integers, and groups of them, to a list of lists.
6 8
check_expect_val: Checks whether the value of an option has an adequate value.
7
read_input: Sets up the calculation by reading the parameters from input file.
9
check_inp_files: Checks if the CP2K/VASP input files are consistent.
10

  
11
Functions to read parameters in the Global section
12
--------------------------------------------------
8 13
get_run_type: Gets 'run_type' value and checks that its value is acceptable.
9 14
get_code: Gets 'code' value and checks that its value is acceptable.
10 15
get_batch_q_sys: Gets 'batch_q_sys' value and checks that its value is
11
acceptable.
16
    acceptable.
17
get_pbc_cell: Gets 'pbc_cell' value and checks that its value is acceptable.
18
get_subm_script: Gets 'subm_script' value and checks that its value is
19
    acceptable.
20
get_project_name: Gets 'project_name' value and checks that its value is
21
    acceptable.
12 22
get_relaunch_err: Gets 'relaunch_err' value and checks that its value is
13
acceptable.
23
    acceptable. # WARNING: OPTION NOT IMPLEMENTED
14 24
get_max_jobs: Gets 'max_jobs' value and checks that its value is acceptable.
15 25
get_special_atoms: Gets 'special_atoms' value and checks that its value is
16
acceptable.
26
    acceptable.
27
get_potcar_dir: Gets 'potcar_dir' value and checks that its value is
28
    acceptable.
29

  
30
Functions to read parameters in the Isolated section
31
----------------------------------------------------
17 32
get_isol_inp_file: Gets 'isol_inp_file' value and checks that its value is
18
acceptable.
19
get_cluster_magns: Gets 'cluster_magns' value and checks that its value is
20
acceptable.
33
    acceptable.
34
get_molec_file: Gets 'molec_file' value and checks that its value is acceptable.
21 35
get_num_conformers: Gets 'num_conformers' value and checks that its value is
22
acceptable.
23
get_num_prom_cand: Gets 'num_prom_cand' value and checks that its value is
24
acceptable.
25
get_iso_rmsd: Gets 'iso_rmsd' value and checks that its value is acceptable.
36
    acceptable.
26 37
get_pre_opt: Gets 'pre_opt' value and checks that its value is acceptable.
38

  
39
Functions to read parameters in the Screening section
40
-----------------------------------------------------
27 41
get_screen_inp_file: Gets 'screen_inp_file' value and checks that its value is
28
acceptable.
42
    acceptable.
43
get_surf_file: Gets 'surf_file' value and checks that its value is acceptable.
29 44
get_sites: Gets 'sites' value and checks that its value is acceptable.
30
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is
31
acceptable.
32
get_try_disso: Gets 'try_disso' value and checks that its value is acceptable.
45
get_surf_ctrs2: Gets 'surf_ctrs2' value and checks that its value is acceptable.
46
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is acceptable.
47
get_molec_ctrs2: Gets 'molec_ctrs2' value and checks that its value is
48
    acceptable.
49
get_molec_ctrs3: Gets 'molec_ctrs3' value and checks that its value is
50
    acceptable.
51
get_max_helic_angle: Gets 'max_helic_angle' value and checks that its value is
52
    acceptable.
53
get_select_magns: Gets 'select_magns' value and checks that its value is
54
    acceptable.
55
get_confs_per_magn: Gets 'confs_per_magn' value and checks that its value is
56
    acceptable.
57
get_surf_norm_vect: Gets 'surf_norm_vect' value and checks that its value is
58
    acceptable.
59
get_adsorption_height: Gets 'adsorption_height' value and checks that its value
60
    is acceptable.
61
get_set_angles: Gets 'set_angles' value and checks that its value is
62
    acceptable.
33 63
get_pts_per_angle: Gets 'pts_per_angle' value and checks that its value is
34
acceptable.
64
    acceptable.
65
get_max_structures: Gets 'max_structures' value and checks that its value is
66
    acceptable.
35 67
get_coll_thrsld: Gets 'coll_thrsld' value and checks that its value is
36
acceptable.
37
get_screen_rmsd: Gets 'screen_rmsd' value and checks that its value is
38
acceptable.
39
get_coll_bottom_z: Gets 'coll_bottom_z' value and checks that its value is
40
acceptable.
68
    acceptable.
69
get_min_coll_height: Gets 'coll_bottom_z' value and checks that its value is
70
    acceptable.
71
get_exclude_ads_ctr: Gets 'exclude_ads_ctr' value and checks that its value is
72
    acceptable.
73
get_H_donor: Gets 'H_donor' value and checks that its value is
74
    acceptable.
75
get_H_acceptor: Gets 'H_acceptor' value and checks that its value is
76
    acceptable.
77
get_use_molec_file: Gets 'use_molec_file' value and checks that its value is
78
    acceptable.
79

  
80
Functions to read parameters in the Refinement section
81
------------------------------------------------------
41 82
get_refine_inp_file: Gets 'refine_inp_file' value and checks that its value is
42
acceptable.
83
    acceptable.
43 84
get_energy_cutoff: Gets 'energy_cutoff' value and checks that its value is
44
acceptable.
85
    acceptable.
86

  
87
read_input: Directs the reading of the parameters in the input file
45 88
"""
46 89
import os.path
47 90
import logging
......
54 97

  
55 98
dos_inp = ConfigParser(inline_comment_prefixes='#',
56 99
                       empty_lines_in_values=False)
57

  
100
# Define new answers to be interpreted as True or False.
58 101
new_answers = {'n': False, 'none': False, 'nay': False,
59 102
               'y': True, 'sí': True, 'aye': True, 'sure': True}
60 103
for answer, val in new_answers.items():
......
64 107
turn_true_answers = [answer for answer in dos_inp.BOOLEAN_STATES
65 108
                     if dos_inp.BOOLEAN_STATES[answer]]
66 109

  
110
# Template error messages to be customized in place.
67 111
no_sect_err = "Section '%s' not found on input file"
68 112
no_opt_err = "Option '%s' not found on section '%s'"
69 113
num_error = "'%s' value must be a %s"
......
73 117

  
74 118
def str2lst(cmplx_str, func=int):  # TODO: enable deeper level of nested lists
75 119
    # TODO Treat all-enclosing parenthesis as a list instead of list of lists.
76
    """Converts a string of integers, and groups of them, to a list.
120
    """Converts a string of integers/floats, and groups of them, to a list.
77 121

  
78 122
    Keyword arguments:
79 123
    @param cmplx_str: str, string of integers (or floats) and groups of them
......
159 203
    return True
160 204

  
161 205

  
162
def check_inp_files(inp_files, code, potcar_dir=None):
206
def check_inp_files(inp_files, code: str, potcar_dir=None):
207
    """Checks if the CP2K/VASP input files are consistent.
208

  
209
    @param inp_files: List of input files
210
    @param code: The code for which the input files are for (VASP or CP2K).
211
    @param potcar_dir: The path where POTCARs are found
212
    @return: None
213
    """
163 214
    if code == 'cp2k':
164 215
        from pycp2k import CP2K
165 216
        if not isinstance(inp_files, str):
......
184 235
                               "also enabled via the 'potcar_dir' keyword. The "
185 236
                               "POTCAR specified as input_file will be used "
186 237
                               "instead of the auto-generated one.")
187
        # Check that it inp_files is a list of file paths
238
        # Check that if inp_files is a list of file paths
188 239
        if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
189 240
                                                   for inp_file in inp_files):
190 241
            err_msg = "'inp_files' should be a list of file names/paths"
......
306 357

  
307 358

  
308 359
def get_relaunch_err():
360
    # WARNING: OPTION NOT IMPLEMENTED
309 361
    relaunch_err_vals = ['geo_not_conv']
310 362
    relaunch_err = dos_inp.get('Global', 'relaunch_err',
311 363
                               fallback="False")
......
815 867
# Read input parameters
816 868

  
817 869
def read_input(in_file):
870
    """Directs the reading of the parameters in the input file.
871

  
872
    @param in_file: The path to the DockOnSurf input file.
873
    @return inp_vars: Dictionary with the values for every option in the input
874
    file.
875
    """
818 876
    from modules.formats import adapt_format
819 877

  
878
    # Checks for errors in the Input file.
820 879
    err_msg = False
821 880
    try:
822 881
        dos_inp.read(in_file)
......
851 910
            logger.error(no_opt_err % (opt, 'Global'))
852 911
            raise NoOptionError(opt, 'Global')
853 912

  
854
    # Gets which sections are to be carried out
913
    # Mandatory options
855 914
    isolated, screening, refinement = get_run_type()
856 915
    inp_vars['isolated'] = isolated
857 916
    inp_vars['screening'] = screening
......
882 941
            logger.error(no_sect_err % 'Isolated')
883 942
            raise NoSectionError('Isolated')
884 943
        # Mandatory options
885
        # Checks whether the mandatory options are present.
886 944
        iso_mand_opts = ['isol_inp_file', 'molec_file']
887 945
        for opt in iso_mand_opts:
888 946
            if not dos_inp.has_option('Isolated', opt):
......
929 987
            raise NoSectionError('Screening')
930 988
        # Mandatory options:
931 989
        # Checks whether the mandatory options are present.
990
        # Mandatory options
932 991
        screen_mand_opts = ['screen_inp_file', 'surf_file', 'sites',
933 992
                            'molec_ctrs']
934 993
        for opt in screen_mand_opts:
......
946 1005
        inp_vars['molec_ctrs'] = get_molec_ctrs()
947 1006

  
948 1007
        # Checks for PBC
949
        # Checks for PBC
950 1008
        atms = adapt_format('ase', inp_vars['surf_file'],
951 1009
                            inp_vars['special_atoms'])
952 1010
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
b/modules/formats.py
1
"""Module for the interconversion between different kinds of atomic data.
1
"""Module for the conversion between different kinds of atomic data.
2 2

  
3 3
functions:
4 4
confs_to_mol_list: Converts the conformers inside a rdkit.Mol object to a list
......
9 9
adapt_format: Converts the coordinate files into a required library object type.
10 10
read_coords_cp2k: Reads the coordinates from a CP2K restart file and returns an
11 11
    ase.Atoms object.
12
collect_coords: Directs the reading of coordinates on a set of subdirectories.
12
read_coords_vasp: Reads the coordinates from VASP OUTCAR file and returns an
13
    ase.Atoms object.
13 14
read_energy_cp2k: Reads the CP2K out file and returns its final energy.
14
collect_energies: Directs the reading of energies on a set of subdirectories.
15
collect_confs: Reads the coordinates and energies of a list of finished
16
    calculations.
15 17
"""
16 18

  
17 19
import logging
......
89 91
    Depending on the library required to use and the file type, it converts the
90 92
    coordinate file into a library-workable object.
91 93
    @param requirement: str, the library for which the conversion should be
92
    made. Accepted values: 'ase', 'rdkit'.
94
        made. Accepted values: 'ase', 'rdkit'.
93 95
    @param coord_file: str, path to the coordinates file aiming to convert.
94
    Accepted file tyoes: 'xyz', 'mol'.
96
        Accepted file formats: all file formats readable by ase.
95 97
    @param spec_atms: List of tuples containing pairs of new/traditional
96 98
        chemical symbols.
97 99
    @return: an object the required library can work with.
......
234 236
                                        spec_atms)
235 237
            # Assign energy
236 238
            for fil in os.listdir(conf_path):
237
                if is_binary(conf_path+fil):
239
                if is_binary(conf_path + fil):
238 240
                    continue
239
                conf_energy = read_energy_cp2k(conf_path+fil)
241
                conf_energy = read_energy_cp2k(conf_path + fil)
240 242
                if conf_energy is not None:
241 243
                    ase_atms.info["energy"] = conf_energy
242 244
                    break
......
252 254
            logger.error(err_msg)
253 255
            raise NotImplementedError(err_msg)
254 256
    return atoms_list
255

  
b/modules/isolated.py
3 3
functions:
4 4
remove_C_linked_Hs: Removes hydrogens bonded to a carbon atom from a molecule.
5 5
gen_confs: Generate a number of conformers in random orientations.
6
get_rmsd: Gets the rmsd matrix of the conformers in a rdkit mol object.
7 6
get_moments_of_inertia: Computes moments of inertia of the given conformers.
8
mmff_opt_confs: Optimizes the geometry of the given conformers and returns the
7
get_moments_of_inertia: Computes the moments of inertia of the given conformers.
8
pre_opt_confs: Optimizes the geometry of the given conformers and returns the
9 9
    new mol object and the energies of its conformers.
10 10
run_isolated: directs the execution of functions to achieve the goal
11 11
"""
......
64 64

  
65 65

  
66 66
def get_moments_of_inertia(mol: Chem.rdchem.Mol):
67
    """Computes the moments of inertia of the given conformers
67
    """Computes the moments of inertia of the given conformers.
68 68

  
69 69
    @param mol: rdkit mol object of the relevant molecule.
70 70
    @return numpy array 2D: The inner array contains the moments of inertia for
......
144 144
    from modules.refinement import select_stable_confs
145 145

  
146 146
    logger.info('Carrying out procedures for the isolated molecule.')
147
    # Read the molecule
147 148
    rd_mol = adapt_format('rdkit', inp_vars['molec_file'],
148 149
                          inp_vars['special_atoms'])
150
    # Generate conformers
149 151
    confs = gen_confs(rd_mol, inp_vars['num_conformers'])
152
    # Pre-optimizes conformers
150 153
    if inp_vars['pre_opt']:
151 154
        confs, confs_ener = pre_opt_confs(confs, inp_vars['pre_opt'])
152 155
    else:
153 156
        confs_ener = pre_opt_confs(confs, max_iters=0)
154 157
    conf_list = confs_to_mol_list(confs)
158
    # Calculates RMSD matrix of the conformers
155 159
    rmsd_mtx = get_rmsd(conf_list)
156 160
    confs_moi = get_moments_of_inertia(confs)
161
    # Clusters the conformers and selects a representative
157 162
    exemplars = clustering(rmsd_mtx)
158 163
    mol_list = confs_to_mol_list(confs, exemplars)
159 164
    ase_atms_list = [rdkit_mol_to_ase_atoms(mol) for mol in mol_list]
......
162 167
                  "dockonsurf.inp"
163 168
        logger.error(err_msg)
164 169
        raise ValueError(err_msg)
170
    # Runs the jobs.
165 171
    run_calc('isolated', inp_vars, ase_atms_list)
166 172
    logger.info("Finished the procedures for the isolated molecule section. ")
167 173
    if inp_vars["batch_q_sys"]:
b/modules/utilities.py
4 4

  
5 5

  
6 6
def tail(f, lines=20):
7
    """Returns the specified last number of lines of a file.
8

  
9
    @param f: The file to retrieve the last lines from.
10
    @param lines: The number of lines to be retrieved.
11
    @return str: The last number of lines
12
    """
7 13
    total_lines_wanted = lines
8 14

  
9 15
    block_size = 1024
......
29 35

  
30 36
def check_bak(file_name):
31 37
    """Checks if a file already exists and backs it up if so.
38

  
32 39
    @param file_name: file to be checked if exists
33 40
    """
34 41
    import os
......
83 90

  
84 91

  
85 92
def _human_key(key):
93
    """Function used as sorting strategy where numbers are sorted human-wise.
94

  
95
    @param key:
96
    @return:
97
    """
86 98
    import re
87 99
    parts = re.split('(\d*\.\d+|\d+)', key)
88 100
    return tuple((e.swapcase() if i % 2 == 0 else float(e))
......
90 102

  
91 103

  
92 104
def is_binary(file):
105
    """Checks if a file is a text file or a binary one.
106

  
107
    @param file:
108
    @return:
109
    """
93 110
    try:
94 111
        with open(file, "r") as fh:
95 112
            fh.read(50)

Formats disponibles : Unified diff