Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / dos_input.py @ cf980c86

Historique | Voir | Annoter | Télécharger (43,75 ko)

1
"""Functions to deal with DockOnSurf input files.
2

3
List of functions:
4

5
Auxiliary functions
6
-------------------
7
str2lst: Converts a string of integers, and groups of them, to a list of lists.
8
check_expect_val: Checks whether the value of an option has an adequate value.
9
check_inp_files: Checks if the CP2K/VASP input files are consistent.
10

11
Functions to read parameters in the Global section
12
--------------------------------------------------
13
get_run_type: Gets 'run_type' value and checks that its value is acceptable.
14
get_code: Gets 'code' value and checks that its value is acceptable.
15
get_batch_q_sys: Gets 'batch_q_sys' value and checks that its value is
16
    acceptable.
17
get_pbc_cell: Gets 'pbc_cell' value and checks that its value is acceptable.
18
get_subm_script: Gets 'subm_script' value and checks that its value is
19
    acceptable.
20
get_project_name: Gets 'project_name' value and checks that its value is
21
    acceptable.
22
get_relaunch_err: Gets 'relaunch_err' value and checks that its value is
23
    acceptable. # WARNING: OPTION NOT IMPLEMENTED
24
get_max_jobs: Gets 'max_jobs' value and checks that its value is acceptable.
25
get_special_atoms: Gets 'special_atoms' value and checks that its value is
26
    acceptable.
27
get_potcar_dir: Gets 'potcar_dir' value and checks that its value is
28
    acceptable.
29

30
Functions to read parameters in the Isolated section
31
----------------------------------------------------
32
get_isol_inp_file: Gets 'isol_inp_file' value and checks that its value is
33
    acceptable.
34
get_molec_file: Gets 'molec_file' value and checks that its value is acceptable.
35
get_num_conformers: Gets 'num_conformers' value and checks that its value is
36
    acceptable.
37
get_pre_opt: Gets 'pre_opt' value and checks that its value is acceptable.
38

39
Functions to read parameters in the Screening section
40
-----------------------------------------------------
41
get_screen_inp_file: Gets 'screen_inp_file' value and checks that its value is
42
    acceptable.
43
get_surf_file: Gets 'surf_file' value and checks that its value is acceptable.
44
get_sites: Gets 'sites' value and checks that its value is acceptable.
45
get_surf_ctrs2: Gets 'surf_ctrs2' value and checks that its value is acceptable.
46
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is acceptable.
47
get_molec_ctrs2: Gets 'molec_ctrs2' value and checks that its value is
48
    acceptable.
49
get_molec_ctrs3: Gets 'molec_ctrs3' value and checks that its value is
50
    acceptable.
51
get_max_helic_angle: Gets 'max_helic_angle' value and checks that its value is
52
    acceptable.
53
get_select_magns: Gets 'select_magns' value and checks that its value is
54
    acceptable.
55
get_confs_per_magn: Gets 'confs_per_magn' value and checks that its value is
56
    acceptable.
57
get_surf_norm_vect: Gets 'surf_norm_vect' value and checks that its value is
58
    acceptable.
59
get_adsorption_height: Gets 'adsorption_height' value and checks that its value
60
    is acceptable.
61
get_set_angles: Gets 'set_angles' value and checks that its value is
62
    acceptable.
63
get_pts_per_angle: Gets 'pts_per_angle' value and checks that its value is
64
    acceptable.
65
get_max_structures: Gets 'max_structures' value and checks that its value is
66
    acceptable.
67
get_coll_thrsld: Gets 'coll_thrsld' value and checks that its value is
68
    acceptable.
69
get_min_coll_height: Gets 'coll_bottom_z' value and checks that its value is
70
    acceptable.
71
get_exclude_ads_ctr: Gets 'exclude_ads_ctr' value and checks that its value is
72
    acceptable.
73
get_H_donor: Gets 'H_donor' value and checks that its value is
74
    acceptable.
75
get_H_acceptor: Gets 'H_acceptor' value and checks that its value is
76
    acceptable.
77
get_use_molec_file: Gets 'use_molec_file' value and checks that its value is
78
    acceptable.
79

80
Functions to read parameters in the Refinement section
81
------------------------------------------------------
82
get_refine_inp_file: Gets 'refine_inp_file' value and checks that its value is
83
    acceptable.
84
get_energy_cutoff: Gets 'energy_cutoff' value and checks that its value is
85
    acceptable.
86

87
read_input: Directs the reading of the parameters in the input file
88
"""
89
import os.path
90
import logging
91
from configparser import ConfigParser, NoSectionError, NoOptionError, \
92
    MissingSectionHeaderError, DuplicateOptionError
93
import numpy as np
94
from modules.utilities import try_command
95

    
96
logger = logging.getLogger('DockOnSurf')
97

    
98
dos_inp = ConfigParser(inline_comment_prefixes='#',
99
                       empty_lines_in_values=False)
100
# Define new answers to be interpreted as True or False.
101
new_answers = {'n': False, 'none': False, 'nay': False,
102
               'y': True, '': True, 'aye': True, 'sure': True}
103
for answer, val in new_answers.items():
104
    dos_inp.BOOLEAN_STATES[answer] = val  # TODO Check value 0
105
turn_false_answers = [answer for answer in dos_inp.BOOLEAN_STATES
106
                      if dos_inp.BOOLEAN_STATES[answer] is False]
107
turn_true_answers = [answer for answer in dos_inp.BOOLEAN_STATES
108
                     if dos_inp.BOOLEAN_STATES[answer]]
109

    
110
# Template error messages to be customized in place.
111
no_sect_err = "Section '%s' not found on input file"
112
no_opt_err = "Option '%s' not found on section '%s'"
113
num_error = "'%s' value must be a %s"
114

    
115

    
116
# Auxilary functions
117

    
118
def str2lst(cmplx_str, func=int):  # TODO: enable deeper level of nested lists
119
    # TODO Treat all-enclosing parenthesis as a list instead of list of lists.
120
    """Converts a string of integers/floats, and groups of them, to a list.
121

122
    Keyword arguments:
123
    @param cmplx_str: str, string of integers (or floats) and groups of them
124
    enclosed by parentheses-like characters.
125
    - Group enclosers: '()' '[]' and '{}'.
126
    - Separators: ',' ';' and ' '.
127
    - Nested groups are not allowed: '3 ((6 7) 8) 4'.
128
    @param func: either to use int or float
129

130
    @return list, list of integers (or floats), or list of integers (or floats)
131
    in the case they were grouped. First, the singlets are placed, and then the
132
    groups in input order.
133

134
    eg. '128,(135 138;141] 87 {45, 68}' -> [128, 87, [135, 138, 141], [45, 68]]
135
    """
136

    
137
    # Checks
138
    error_msg = "Function argument should be a str, sequence of " \
139
                "numbers separated by ',' ';' or ' '." \
140
                "\nThey can be grouped in parentheses-like enclosers: '()', " \
141
                "'[]' or {}. Nested groups are not allowed. \n" \
142
                "eg. 128,(135 138;141) 87 {45, 68}"
143
    cmplx_str = try_command(cmplx_str.replace, [(AttributeError, error_msg)],
144
                            ',', ' ')
145

    
146
    cmplx_str = cmplx_str.replace(';', ' ').replace('[', '(').replace(
147
        ']', ')').replace('{', '(').replace('}', ')')
148

    
149
    try_command(list, [(ValueError, error_msg)], map(func, cmplx_str.replace(
150
        ')', '').replace('(', '').split()))
151

    
152
    deepness = 0
153
    for el in cmplx_str.split():
154
        if '(' in el:
155
            deepness += 1
156
        if ')' in el:
157
            deepness += -1
158
        if deepness > 1 or deepness < 0:
159
            logger.error(error_msg)
160
            raise ValueError(error_msg)
161

    
162
    init_list = cmplx_str.split()
163
    start_group = []
164
    end_group = []
165
    for i, element in enumerate(init_list):
166
        if '(' in element:
167
            start_group.append(i)
168
            init_list[i] = element.replace('(', '')
169
        if ')' in element:
170
            end_group.append(i)
171
            init_list[i] = element.replace(')', '')
172

    
173
    init_list = list(map(func, init_list))
174

    
175
    new_list = []
176
    for start_el, end_el in zip(start_group, end_group):
177
        new_list.append(init_list[start_el:end_el + 1])
178

    
179
    for v in new_list:
180
        for el in v:
181
            init_list.remove(el)
182
    return init_list + new_list
183

    
184

    
185
def check_expect_val(value, expect_vals, err_msg=None):
186
    """Checks whether an option lies within its expected values.
187

188
    Keyword arguments:
189
    @param value: The variable to check if its value lies within the expected
190
    ones
191
    @param expect_vals: list, list of values allowed for the present option.
192
    @param err_msg: The error message to be prompted in both log and screen.
193
    @raise ValueError: if the value is not among the expected ones.
194
    @return True if the value is among the expected ones.
195
    """
196
    if err_msg is None:
197
        err_msg = f"'{value}' is not an adequate value.\n" \
198
                  f"Adequate values: {expect_vals}"
199
    if not any([exp_val == value for exp_val in expect_vals]):
200
        logger.error(err_msg)
201
        raise ValueError(err_msg)
202

    
203
    return True
204

    
205

    
206
def check_inp_files(inp_files, code: str, potcar_dir=None):
207
    """Checks if the CP2K/VASP input files are consistent.
208

209
    @param inp_files: List of input files
210
    @param code: The code for which the input files are for (VASP or CP2K).
211
    @param potcar_dir: The path where POTCARs are found
212
    @return: None
213
    """
214
    if code == 'cp2k':
215
        from pycp2k import CP2K
216
        if not isinstance(inp_files, str):
217
            err_msg = "When using CP2K, only one input file is allowed"
218
            logger.error(err_msg)
219
            ValueError(err_msg)
220
        elif not os.path.isfile(inp_files):
221
            err_msg = f"Input file {inp_files} was not found."
222
            logger.error(err_msg)
223
            raise FileNotFoundError(err_msg)
224
        cp2k = CP2K()
225
        try_command(cp2k.parse,
226
                    [(UnboundLocalError, "Invalid CP2K input file")], inp_files)
227
    elif code == "vasp":
228
        if not potcar_dir:
229
            mand_files = ["INCAR", "KPOINTS", "POTCAR"]
230
        else:
231
            mand_files = ["INCAR", "KPOINTS"]
232
            if any("POTCAR" in inp_file for inp_file in inp_files):
233
                logger.warning("A POTCAR file was specified as input file "
234
                               "while the automatic generation of POTCAR was "
235
                               "also enabled via the 'potcar_dir' keyword. The "
236
                               "POTCAR specified as input_file will be used "
237
                               "instead of the auto-generated one.")
238
        # Check that if inp_files is a list of file paths
239
        if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
240
                                                   for inp_file in inp_files):
241
            err_msg = "'inp_files' should be a list of file names/paths"
242
            logger.error(err_msg)
243
            raise ValueError(err_msg)
244
        # Check that all mandatory files are defined once and just once.
245
        elif [[mand_file in inp_file for inp_file in inp_files].count(True)
246
              for mand_file in mand_files].count(1) != len(mand_files):
247
            err_msg = f"Each of the mandatory files {mand_files} must be " \
248
                      f"defined once and just once."
249
            logger.error(err_msg)
250
            raise FileNotFoundError(err_msg)
251
        # Check that the defined files exist
252
        elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
253
            err_msg = f"At least one of the mandatory files {mand_files} was " \
254
                      "not found."
255
            logger.error(err_msg)
256
            raise FileNotFoundError(err_msg)
257
        # Check that mandatory files are actual vasp files.
258
        else:
259
            from pymatgen.io.vasp.inputs import Incar, Kpoints, Potcar
260
            for inp_file in inp_files:
261
                file_name = inp_file.split("/")[-1]
262
                if not any(mand_file in file_name for mand_file in mand_files):
263
                    continue
264
                file_type = ""
265
                for mand_file in mand_files:
266
                    if mand_file in inp_file:
267
                        file_type = mand_file
268
                err = False
269
                err_msg = f"'{inp_file}' is not a valid {file_name} file."
270
                try:
271
                    eval(file_type.capitalize()).from_file(inp_file)
272
                except ValueError:
273
                    logger.error(err_msg)
274
                    err = ValueError(err_msg)
275
                except IndexError:
276
                    logger.error(err_msg)
277
                    err = IndexError(err_msg)
278
                else:
279
                    if file_name == "INCAR":
280
                        Incar.from_file("INCAR").check_params()
281
                finally:
282
                    if isinstance(err, BaseException):
283
                        raise err
284

    
285

    
286
# Global
287

    
288
def get_run_type():
289
    isolated, screening, refinement = (False, False, False)
290
    run_type_vals = ['isolated', 'screening', 'refinement', 'adsorption',
291
                     'full']
292
    run_types = dos_inp.get('Global', 'run_type').split()
293
    for run_type in run_types:
294
        check_expect_val(run_type.lower(), run_type_vals)
295
        if 'isol' in run_type.lower():
296
            isolated = True
297
        if 'screen' in run_type.lower():
298
            screening = True
299
        if 'refine' in run_type.lower():
300
            refinement = True
301
        if 'adsor' in run_type.lower():
302
            screening, refinement = (True, True)
303
        if 'full' in run_type.lower():
304
            isolated, screening, refinement = (True, True, True)
305

    
306
    return isolated, screening, refinement
307

    
308

    
309
def get_code():
310
    code_vals = ['cp2k', 'vasp']
311
    check_expect_val(dos_inp.get('Global', 'code').lower(), code_vals)
312
    code = dos_inp.get('Global', 'code').lower()
313
    return code
314

    
315

    
316
def get_batch_q_sys():
317
    batch_q_sys_vals = ['sge', 'lsf', 'irene', 'local'] + turn_false_answers
318
    check_expect_val(dos_inp.get('Global', 'batch_q_sys').lower(),
319
                     batch_q_sys_vals)
320
    batch_q_sys = dos_inp.get('Global', 'batch_q_sys').lower()
321
    if batch_q_sys.lower() in turn_false_answers:
322
        return False
323
    else:
324
        return batch_q_sys
325

    
326

    
327
def get_pbc_cell():
328
    from ase.atoms import Cell
329
    err_msg = "'pbc_cell' must be either 3 vectors of size 3 or False."
330
    pbc_cell_str = dos_inp.get('Global', 'pbc_cell', fallback="False")
331
    if pbc_cell_str.lower() in turn_false_answers:
332
        return False
333
    else:
334
        pbc_cell = np.array(try_command(str2lst, [(ValueError, err_msg)],
335
                                        pbc_cell_str, float))
336
        if pbc_cell.shape != (3, 3):
337
            logger.error(err_msg)
338
            raise ValueError(err_msg)
339
        if np.linalg.det(pbc_cell) == 0.0:
340
            err_msg = "The volume of the defined cell is 0"
341
            logger.error(err_msg)
342
            raise ValueError(err_msg)
343
        return Cell(pbc_cell)
344

    
345

    
346
def get_subm_script():
347
    subm_script = dos_inp.get('Global', 'subm_script')
348
    if not os.path.isfile(subm_script):
349
        logger.error(f'File {subm_script} not found.')
350
        raise FileNotFoundError(f'File {subm_script} not found')
351
    return subm_script
352

    
353

    
354
def get_project_name():
355
    project_name = dos_inp.get('Global', 'project_name', fallback='')
356
    return project_name
357

    
358

    
359
def get_relaunch_err():
360
    # WARNING: OPTION NOT IMPLEMENTED
361
    relaunch_err_vals = ['geo_not_conv']
362
    relaunch_err = dos_inp.get('Global', 'relaunch_err',
363
                               fallback="False")
364
    if relaunch_err.lower() in turn_false_answers:
365
        return False
366
    else:
367
        check_expect_val(relaunch_err.lower(), relaunch_err_vals)
368
    return relaunch_err
369

    
370

    
371
def get_max_jobs():
372
    import re
373
    err_msg = "'max_jobs' must be a list of, number plus 'p', 'q' or 'r', or " \
374
              "a combination of them without repeating letters.\n" \
375
              "eg: '2r 3p 4pr', '5q' or '3r 3p'"
376
    max_jobs_str = dos_inp.get('Global', 'max_jobs', fallback="inf").lower()
377
    str_vals = ["r", "p", "q", "rp", "rq", "pr", "qr"]
378
    max_jobs = {"r": np.inf, "p": np.inf, "rp": np.inf}
379
    if "inf" == max_jobs_str:
380
        return {"r": np.inf, "p": np.inf, "rp": np.inf}
381
    # Iterate over the number of requirements:
382
    for req in max_jobs_str.split():
383
        # Split numbers from letters into a list
384
        req_parts = re.findall(r'[a-z]+|\d+', req)
385
        if len(req_parts) != 2:
386
            logger.error(err_msg)
387
            raise ValueError(err_msg)
388
        if req_parts[0].isdecimal():
389
            req_parts[1] = req_parts[1].replace('q', 'p').replace('pr', 'rp')
390
            if req_parts[1] in str_vals and max_jobs[req_parts[1]] == np.inf:
391
                max_jobs[req_parts[1]] = int(req_parts[0])
392
        elif req_parts[1].isdecimal():
393
            req_parts[0] = req_parts[0].replace('q', 'p').replace('pr', 'rp')
394
            if req_parts[0] in str_vals and max_jobs[req_parts[0]] == np.inf:
395
                max_jobs[req_parts[0]] = int(req_parts[1])
396
        else:
397
            logger.error(err_msg)
398
            raise ValueError(err_msg)
399

    
400
    return max_jobs
401

    
402

    
403
def get_special_atoms():
404
    from ase.data import chemical_symbols
405

    
406
    spec_at_err = '\'special_atoms\' does not have an adequate format.\n' \
407
                  'Adequate format: (Fe1 Fe) (O1 O)'
408
    special_atoms = dos_inp.get('Global', 'special_atoms', fallback="False")
409
    if special_atoms.lower() in turn_false_answers:
410
        special_atoms = []
411
    else:
412
        # Converts the string into a list of tuples
413
        lst_tple = [tuple(pair.replace("(", "").split()) for pair in
414
                    special_atoms.split(")")[:-1]]
415
        if len(lst_tple) == 0:
416
            logger.error(spec_at_err)
417
            raise ValueError(spec_at_err)
418
        for i, tup in enumerate(lst_tple):
419
            if not isinstance(tup, tuple) or len(tup) != 2:
420
                logger.error(spec_at_err)
421
                raise ValueError(spec_at_err)
422
            if tup[1].capitalize() not in chemical_symbols:
423
                elem_err = "The second element of the couple should be an " \
424
                           "actual element of the periodic table"
425
                logger.error(elem_err)
426
                raise ValueError(elem_err)
427
            if tup[0].capitalize() in chemical_symbols:
428
                elem_err = "The first element of the couple is already an " \
429
                           "actual element of the periodic table, "
430
                logger.error(elem_err)
431
                raise ValueError(elem_err)
432
            for j, tup2 in enumerate(lst_tple):
433
                if j <= i:
434
                    continue
435
                if tup2[0] == tup[0]:
436
                    label_err = f'You have specified the label {tup[0]} to ' \
437
                                f'more than one special atom'
438
                    logger.error(label_err)
439
                    raise ValueError(label_err)
440
        special_atoms = lst_tple
441
    return special_atoms
442

    
443

    
444
def get_potcar_dir():
445
    potcar_dir = dos_inp.get('Global', 'potcar_dir', fallback="False")
446
    if potcar_dir.lower() in turn_false_answers:
447
        return False
448
    elif not os.path.isdir(potcar_dir):
449
        err_msg = "'potcar_dir' must be either False or a directory."
450
        logger.error(err_msg)
451
        raise ValueError(err_msg)
452
    else:
453
        return potcar_dir
454

    
455

    
456
# Isolated
457

    
458
def get_isol_inp_file(code, potcar_dir=None):  # TODO allow spaces in path names
459
    inp_file_lst = dos_inp.get('Isolated', 'isol_inp_file').split()
460
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
461
                    code, potcar_dir)
462
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
463

    
464

    
465
def get_molec_file():
466
    molec_file = dos_inp.get('Isolated', 'molec_file')
467
    if not os.path.isfile(molec_file):
468
        logger.error(f'File {molec_file} not found.')
469
        raise FileNotFoundError(f'File {molec_file} not found')
470
    return molec_file
471

    
472

    
473
def get_num_conformers():
474
    err_msg = num_error % ('num_conformers', 'positive integer')
475
    num_conformers = try_command(dos_inp.getint, [(ValueError, err_msg)],
476
                                 'Isolated', 'num_conformers', fallback=100)
477
    if num_conformers < 1:
478
        logger.error(err_msg)
479
        raise ValueError(err_msg)
480
    return num_conformers
481

    
482

    
483
def get_pre_opt():
484
    pre_opt_vals = ['uff', 'mmff'] + turn_false_answers
485
    check_expect_val(dos_inp.get('Isolated', 'pre_opt').lower(), pre_opt_vals)
486
    pre_opt = dos_inp.get('Isolated', 'pre_opt').lower()
487
    if pre_opt in turn_false_answers:
488
        return False
489
    else:
490
        return pre_opt
491

    
492

    
493
# Screening
494

    
495
def get_screen_inp_file(code,
496
                        potcar_dir=None):  # TODO allow spaces in path names
497
    inp_file_lst = dos_inp.get('Screening', 'screen_inp_file').split()
498
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
499
                    code, potcar_dir)
500
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
501

    
502

    
503
def get_surf_file():
504
    surf_file = dos_inp.get('Screening', 'surf_file')
505
    if not os.path.isfile(surf_file):
506
        logger.error(f'File {surf_file} not found.')
507
        raise FileNotFoundError(f'File {surf_file} not found')
508
    return surf_file
509

    
510

    
511
def get_sites():
512
    err_msg = 'The value of sites should be a list of atom numbers ' \
513
              '(ie. positive integers) or groups of atom numbers ' \
514
              'grouped by parentheses-like enclosers. \n' \
515
              'eg. 128,(135 138;141) 87 {45, 68}'
516
    # Convert the string into a list of lists
517
    sites = try_command(str2lst,
518
                        [(ValueError, err_msg), (AttributeError, err_msg)],
519
                        dos_inp.get('Screening', 'sites'))
520
    # Check all elements of the list (of lists) are positive integers
521
    for site in sites:
522
        if type(site) is list:
523
            for atom in site:
524
                if atom < 0:
525
                    logger.error(err_msg)
526
                    raise ValueError(err_msg)
527
        elif type(site) is int:
528
            if site < 0:
529
                logger.error(err_msg)
530
                raise ValueError(err_msg)
531
        else:
532
            logger.error(err_msg)
533
            raise ValueError(err_msg)
534

    
535
    return sites
536

    
537

    
538
def get_surf_ctrs2():
539
    err_msg = 'The value of surf_ctrs2 should be a list of atom numbers ' \
540
              '(ie. positive integers) or groups of atom numbers ' \
541
              'grouped by parentheses-like enclosers. \n' \
542
              'eg. 128,(135 138;141) 87 {45, 68}'
543
    # Convert the string into a list of lists
544
    surf_ctrs2 = try_command(str2lst,
545
                             [(ValueError, err_msg), (AttributeError, err_msg)],
546
                             dos_inp.get('Screening', 'surf_ctrs2'))
547
    # Check all elements of the list (of lists) are positive integers
548
    for ctr in surf_ctrs2:
549
        if type(ctr) is list:
550
            for atom in ctr:
551
                if atom < 0:
552
                    logger.error(err_msg)
553
                    raise ValueError(err_msg)
554
        elif type(ctr) is int:
555
            if ctr < 0:
556
                logger.error(err_msg)
557
                raise ValueError(err_msg)
558
        else:
559
            logger.error(err_msg)
560
            raise ValueError(err_msg)
561

    
562
    return surf_ctrs2
563

    
564

    
565
def get_molec_ctrs():
566
    err_msg = 'The value of molec_ctrs should be a list of atom' \
567
              ' numbers (ie. positive integers) or groups of atom ' \
568
              'numbers enclosed by parentheses-like characters. \n' \
569
              'eg. 128,(135 138;141) 87 {45, 68}'
570
    # Convert the string into a list of lists
571
    molec_ctrs = try_command(str2lst,
572
                             [(ValueError, err_msg),
573
                              (AttributeError, err_msg)],
574
                             dos_inp.get('Screening', 'molec_ctrs'))
575
    # Check all elements of the list (of lists) are positive integers
576
    for ctr in molec_ctrs:
577
        if isinstance(ctr, list):
578
            for atom in ctr:
579
                if atom < 0:
580
                    logger.error(err_msg)
581
                    raise ValueError(err_msg)
582
        elif isinstance(ctr, int):
583
            if ctr < 0:
584
                logger.error(err_msg)
585
                raise ValueError(err_msg)
586
        else:
587
            logger.error(err_msg)
588
            raise ValueError(err_msg)
589

    
590
    return molec_ctrs
591

    
592

    
593
def get_molec_ctrs2():
594
    err_msg = 'The value of molec_ctrs2 should be a list of atom ' \
595
              'numbers (ie. positive integers) or groups of atom ' \
596
              'numbers enclosed by parentheses-like characters. \n' \
597
              'eg. 128,(135 138;141) 87 {45, 68}'
598
    # Convert the string into a list of lists
599
    molec_ctrs2 = try_command(str2lst, [(ValueError, err_msg),
600
                                        (AttributeError, err_msg)],
601
                              dos_inp.get('Screening', 'molec_ctrs2'))
602

    
603
    # Check all elements of the list (of lists) are positive integers
604
    for ctr in molec_ctrs2:
605
        if isinstance(ctr, list):
606
            for atom in ctr:
607
                if atom < 0:
608
                    logger.error(err_msg)
609
                    raise ValueError(err_msg)
610
        elif isinstance(ctr, int):
611
            if ctr < 0:
612
                logger.error(err_msg)
613
                raise ValueError(err_msg)
614
        else:
615
            logger.error(err_msg)
616
            raise ValueError(err_msg)
617

    
618
    return molec_ctrs2
619

    
620

    
621
def get_molec_ctrs3():
622
    err_msg = 'The value of molec_ctrs3 should be a list of atom ' \
623
              'numbers (ie. positive integers) or groups of atom ' \
624
              'numbers enclosed by parentheses-like characters. \n' \
625
              'eg. 128,(135 138;141) 87 {45, 68}'
626
    # Convert the string into a list of lists
627
    molec_ctrs3 = try_command(str2lst, [(ValueError, err_msg),
628
                                        (AttributeError, err_msg)],
629
                              dos_inp.get('Screening', 'molec_ctrs3'))
630

    
631
    # Check all elements of the list (of lists) are positive integers
632
    for ctr in molec_ctrs3:
633
        if isinstance(ctr, list):
634
            for atom in ctr:
635
                if atom < 0:
636
                    logger.error(err_msg)
637
                    raise ValueError(err_msg)
638
        elif isinstance(ctr, int):
639
            if ctr < 0:
640
                logger.error(err_msg)
641
                raise ValueError(err_msg)
642
        else:
643
            logger.error(err_msg)
644
            raise ValueError(err_msg)
645

    
646
    return molec_ctrs3
647

    
648

    
649
def get_max_helic_angle():
650
    err_msg = "'max_helic_angle' must be a positive number in degrees"
651
    max_helic_angle = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
652
                                  'Screening', 'max_helic_angle',
653
                                  fallback=180.0)
654
    if max_helic_angle < 0:
655
        logger.error(err_msg)
656
        raise ValueError(err_msg)
657

    
658
    return max_helic_angle
659

    
660

    
661
def get_select_magns():
662
    select_magns_vals = ['energy', 'moi']
663
    select_magns_str = dos_inp.get('Screening', 'select_magns',
664
                                   fallback='moi')
665
    select_magns_str.replace(',', ' ').replace(';', ' ')
666
    select_magns = select_magns_str.split(' ')
667
    select_magns = [m.lower() for m in select_magns]
668
    for m in select_magns:
669
        check_expect_val(m, select_magns_vals)
670
    return select_magns
671

    
672

    
673
def get_confs_per_magn():
674
    err_msg = num_error % ('confs_per_magn', 'positive integer')
675
    confs_per_magn = try_command(dos_inp.getint, [(ValueError, err_msg)],
676
                                 'Screening', 'confs_per_magn', fallback=2)
677
    if confs_per_magn <= 0:
678
        logger.error(err_msg)
679
        raise ValueError(err_msg)
680
    return confs_per_magn
681

    
682

    
683
def get_surf_norm_vect():
684
    err = "'surf_norm_vect' must be a 3 component vector, 'x', 'y' or 'z', " \
685
          "'auto' or 'asann'."
686
    cart_axes = {'x': [1.0, 0.0, 0.0], '-x': [-1.0, 0.0, 0.0],
687
                 'y': [0.0, 1.0, 0.0], '-y': [0.0, -1.0, 0.0],
688
                 'z': [0.0, 0.0, 1.0], '-z': [0.0, 0.0, -1.0]}
689
    surf_norm_vect_str = dos_inp.get('Screening', 'surf_norm_vect',
690
                                     fallback="auto").lower()
691
    if surf_norm_vect_str == "asann" or surf_norm_vect_str == "auto":
692
        return 'auto'
693
    if surf_norm_vect_str in cart_axes:
694
        return np.array(cart_axes[surf_norm_vect_str])
695
    surf_norm_vect = try_command(str2lst, [(ValueError, err)],
696
                                 surf_norm_vect_str, float)
697
    if len(surf_norm_vect) != 3:
698
        logger.error(err)
699
        raise ValueError(err)
700

    
701
    return np.array(surf_norm_vect) / np.linalg.norm(surf_norm_vect)
702

    
703

    
704
def get_adsorption_height():
705
    err_msg = num_error % ('adsorption_height', 'positive number')
706
    ads_height = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
707
                             'Screening', 'adsorption_height', fallback=2.5)
708
    if ads_height <= 0:
709
        logger.error(err_msg)
710
        raise ValueError(err_msg)
711
    return ads_height
712

    
713

    
714
def get_set_angles():
715
    set_vals = ['euler', 'internal']
716
    check_expect_val(dos_inp.get('Screening', 'set_angles').lower(), set_vals)
717
    set_angles = dos_inp.get('Screening', 'set_angles',
718
                             fallback='euler').lower()
719
    return set_angles
720

    
721

    
722
def get_pts_per_angle():
723
    err_msg = num_error % ('sample_points_per_angle', 'positive integer')
724
    pts_per_angle = try_command(dos_inp.getint,
725
                                [(ValueError, err_msg)],
726
                                'Screening', 'sample_points_per_angle',
727
                                fallback=3)
728
    if pts_per_angle <= 0:
729
        logger.error(err_msg)
730
        raise ValueError(err_msg)
731
    return pts_per_angle
732

    
733

    
734
def get_max_structures():
735
    err_msg = num_error % ('max_structures', 'positive integer')
736
    max_structs = dos_inp.get('Screening', 'max_structures', fallback="False")
737
    if max_structs.lower() in turn_false_answers:
738
        return np.inf
739
    if try_command(int, [(ValueError, err_msg)], max_structs) <= 0:
740
        logger.error(err_msg)
741
        raise ValueError(err_msg)
742
    return int(max_structs)
743

    
744

    
745
def get_coll_thrsld():
746
    err_msg = num_error % ('collision_threshold', 'positive number')
747
    coll_thrsld_str = dos_inp.get('Screening', 'collision_threshold',
748
                                  fallback="False")
749
    if coll_thrsld_str.lower() in turn_false_answers:
750
        return False
751
    coll_thrsld = try_command(float, [(ValueError, err_msg)], coll_thrsld_str)
752

    
753
    if coll_thrsld <= 0:
754
        logger.error(err_msg)
755
        raise ValueError(err_msg)
756

    
757
    return coll_thrsld
758

    
759

    
760
def get_min_coll_height(norm_vect):
761
    err_msg = num_error % ('min_coll_height', 'decimal number')
762
    min_coll_height = dos_inp.get('Screening', 'min_coll_height',
763
                                  fallback="1.5")
764
    if min_coll_height.lower() in turn_false_answers:
765
        return False
766
    min_coll_height = try_command(float, [(ValueError, err_msg)],
767
                                  min_coll_height)
768
    cart_axes = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0],
769
                 [-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]
770
    err_msg = "'min_coll_height' option is only implemented for " \
771
              "'surf_norm_vect' to be one of the x, y or z axes. "
772
    if not isinstance(norm_vect, str) or norm_vect != 'auto':
773
        check_expect_val(norm_vect.tolist(), cart_axes, err_msg)
774
    return min_coll_height
775

    
776

    
777
def get_exclude_ads_ctr():
778
    err_msg = "exclude_ads_ctr must have a boolean value."
779
    exclude_ads_ctr = try_command(dos_inp.getboolean, [(ValueError, err_msg)],
780
                                  "Screening", "exclude_ads_ctr",
781
                                  fallback=False)
782
    return exclude_ads_ctr
783

    
784

    
785
def get_H_donor(spec_atoms):
786
    from ase.data import chemical_symbols
787
    err_msg = "The value of 'h_donor' must be either False, a chemical symbol " \
788
              "or an atom index"
789
    h_donor_str = dos_inp.get('Screening', 'h_donor', fallback="False")
790
    h_donor = []
791
    if h_donor_str.lower() in turn_false_answers:
792
        return False
793
    err = False
794
    for el in h_donor_str.split():
795
        try:
796
            h_donor.append(int(el))
797
        except ValueError:
798
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
799
                                             for nw_sym in pairs]:
800
                err = True
801
            else:
802
                h_donor.append(el)
803
        finally:
804
            if err:
805
                logger.error(err_msg)
806
                ValueError(err_msg)
807
    return h_donor
808

    
809

    
810
def get_H_acceptor(spec_atoms):
811
    from ase.data import chemical_symbols
812
    err_msg = "The value of 'h_acceptor' must be either 'all', a chemical " \
813
              "symbol or an atom index."
814
    h_acceptor_str = dos_inp.get('Screening', 'h_acceptor', fallback="all")
815
    if h_acceptor_str.lower() == "all":
816
        return "all"
817
    h_acceptor = []
818
    err = False
819
    for el in h_acceptor_str.split():
820
        try:
821
            h_acceptor.append(int(el))
822
        except ValueError:
823
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
824
                                             for nw_sym in pairs]:
825
                err = True
826
            else:
827
                h_acceptor.append(el)
828
        finally:
829
            if err:
830
                logger.error(err_msg)
831
                raise ValueError(err_msg)
832
    return h_acceptor
833

    
834

    
835
def get_use_molec_file():
836
    use_molec_file = dos_inp.get('Screening', 'use_molec_file',
837
                                 fallback='False')
838
    if use_molec_file.lower() in turn_false_answers:
839
        return False
840
    if not os.path.isfile(use_molec_file):
841
        logger.error(f'File {use_molec_file} not found.')
842
        raise FileNotFoundError(f'File {use_molec_file} not found')
843

    
844
    return use_molec_file
845

    
846

    
847
# Refinement
848

    
849
def get_refine_inp_file(code, potcar_dir=None):
850
    inp_file_lst = dos_inp.get('Refinement', 'refine_inp_file').split()
851
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
852
                    code, potcar_dir)
853
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
854

    
855

    
856
def get_energy_cutoff():
857
    err_msg = num_error % ('energy_cutoff', 'positive decimal number')
858
    energy_cutoff = try_command(dos_inp.getfloat,
859
                                [(ValueError, err_msg)],
860
                                'Refinement', 'energy_cutoff', fallback=0.5)
861
    if energy_cutoff < 0:
862
        logger.error(err_msg)
863
        raise ValueError(err_msg)
864
    return energy_cutoff
865

    
866

    
867
# Read input parameters
868

    
869
def read_input(in_file):
870
    """Directs the reading of the parameters in the input file.
871

872
    @param in_file: The path to the DockOnSurf input file.
873
    @return inp_vars: Dictionary with the values for every option in the input
874
    file.
875
    """
876
    from modules.formats import adapt_format
877

    
878
    # Checks for errors in the Input file.
879
    err_msg = False
880
    try:
881
        dos_inp.read(in_file)
882
    except MissingSectionHeaderError as e:
883
        logger.error('There are options in the input file without a Section '
884
                     'header.')
885
        err_msg = e
886
    except DuplicateOptionError as e:
887
        logger.error('There is an option in the input file that has been '
888
                     'specified more than once.')
889
        err_msg = e
890
    except Exception as e:
891
        err_msg = e
892
    else:
893
        err_msg = False
894
    finally:
895
        if isinstance(err_msg, BaseException):
896
            raise err_msg
897

    
898
    inp_vars = {}
899

    
900
    # Global
901
    if not dos_inp.has_section('Global'):
902
        logger.error(no_sect_err % 'Global')
903
        raise NoSectionError('Global')
904

    
905
    # Mandatory options
906
    # Checks whether the mandatory options 'run_type', 'code', etc. are present.
907
    glob_mand_opts = ['run_type', 'code', 'batch_q_sys']
908
    for opt in glob_mand_opts:
909
        if not dos_inp.has_option('Global', opt):
910
            logger.error(no_opt_err % (opt, 'Global'))
911
            raise NoOptionError(opt, 'Global')
912

    
913
    # Mandatory options
914
    isolated, screening, refinement = get_run_type()
915
    inp_vars['isolated'] = isolated
916
    inp_vars['screening'] = screening
917
    inp_vars['refinement'] = refinement
918
    inp_vars['code'] = get_code()
919
    inp_vars['batch_q_sys'] = get_batch_q_sys()
920

    
921
    # Dependent options:
922
    if inp_vars['batch_q_sys']:
923
        inp_vars['max_jobs'] = get_max_jobs()
924
        if inp_vars['batch_q_sys'] != 'local':
925
            if not dos_inp.has_option('Global', 'subm_script'):
926
                logger.error(no_opt_err % ('subm_script', 'Global'))
927
                raise NoOptionError('subm_script', 'Global')
928
            inp_vars['subm_script'] = get_subm_script()
929
    if inp_vars['code'] == "vasp":
930
        inp_vars['potcar_dir'] = get_potcar_dir()
931

    
932
    # Facultative options (Default/Fallback value present)
933
    inp_vars['pbc_cell'] = get_pbc_cell()
934
    inp_vars['project_name'] = get_project_name()
935
    # inp_vars['relaunch_err'] = get_relaunch_err()
936
    inp_vars['special_atoms'] = get_special_atoms()
937

    
938
    # Isolated
939
    if isolated:
940
        if not dos_inp.has_section('Isolated'):
941
            logger.error(no_sect_err % 'Isolated')
942
            raise NoSectionError('Isolated')
943
        # Mandatory options
944
        iso_mand_opts = ['isol_inp_file', 'molec_file']
945
        for opt in iso_mand_opts:
946
            if not dos_inp.has_option('Isolated', opt):
947
                logger.error(no_opt_err % (opt, 'Isolated'))
948
                raise NoOptionError(opt, 'Isolated')
949
        if 'potcar_dir' in inp_vars:
950
            inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'],
951
                                                          inp_vars[
952
                                                              'potcar_dir'])
953
        else:
954
            inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'])
955
        inp_vars['molec_file'] = get_molec_file()
956

    
957
        # Checks for PBC
958
        atms = adapt_format('ase', inp_vars['molec_file'],
959
                            inp_vars['special_atoms'])
960
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
961
                and inp_vars['pbc_cell'] is False:
962
            err_msg = "When running calculations with 'VASP', the PBC cell " \
963
                      "should be provided either implicitely inside " \
964
                      "'molec_file' or by setting the 'pbc_cell' option."
965
            logger.error(err_msg)
966
            raise ValueError(err_msg)
967
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
968
            inp_vars['pbc_cell'] = atms.cell
969
            logger.info(f"Obtained pbc_cell from '{inp_vars['molec_file']}' "
970
                        f"file.")
971
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
972
                                                        atms.cell):
973
            logger.warning("'molec_file' has an implicit cell defined "
974
                           f"different than 'pbc_cell'.\n"
975
                           f"'molec_file' = {atms.cell}.\n"
976
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
977
                           "'pbc_cell' value will be used.")
978

    
979
        # Facultative options (Default/Fallback value present)
980
        inp_vars['num_conformers'] = get_num_conformers()
981
        inp_vars['pre_opt'] = get_pre_opt()
982

    
983
    # Screening
984
    if screening:
985
        if not dos_inp.has_section('Screening'):
986
            logger.error(no_sect_err % 'Screening')
987
            raise NoSectionError('Screening')
988
        # Mandatory options:
989
        # Checks whether the mandatory options are present.
990
        # Mandatory options
991
        screen_mand_opts = ['screen_inp_file', 'surf_file', 'sites',
992
                            'molec_ctrs']
993
        for opt in screen_mand_opts:
994
            if not dos_inp.has_option('Screening', opt):
995
                logger.error(no_opt_err % (opt, 'Screening'))
996
                raise NoOptionError(opt, 'Screening')
997
        if 'potcar_dir' in inp_vars:
998
            inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'],
999
                                                              inp_vars[
1000
                                                                  'potcar_dir'])
1001
        else:
1002
            inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'])
1003
        inp_vars['surf_file'] = get_surf_file()
1004
        inp_vars['sites'] = get_sites()
1005
        inp_vars['molec_ctrs'] = get_molec_ctrs()
1006

    
1007
        # Checks for PBC
1008
        atms = adapt_format('ase', inp_vars['surf_file'],
1009
                            inp_vars['special_atoms'])
1010
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
1011
                and inp_vars['pbc_cell'] is False:
1012
            err_msg = "When running calculations with 'VASP', the PBC cell " \
1013
                      "should be provided either implicitely inside " \
1014
                      "'surf_file' or by setting the 'pbc_cell' option."
1015
            logger.error(err_msg)
1016
            raise ValueError(err_msg)
1017
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
1018
            inp_vars['pbc_cell'] = atms.cell
1019
            logger.info(f"Obtained pbc_cell from '{inp_vars['surf_file']}' "
1020
                        f"file.")
1021
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
1022
                                                        atms.cell):
1023
            logger.warning("'surf_file' has an implicit cell defined "
1024
                           f"different than 'pbc_cell'.\n"
1025
                           f"'surf_file' = {atms.cell}.\n"
1026
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
1027
                           "'pbc_cell' value will be used.")
1028

    
1029
        # Facultative options (Default value present)
1030
        inp_vars['select_magns'] = get_select_magns()
1031
        inp_vars['confs_per_magn'] = get_confs_per_magn()
1032
        inp_vars['surf_norm_vect'] = get_surf_norm_vect()
1033
        inp_vars['adsorption_height'] = get_adsorption_height()
1034
        inp_vars['set_angles'] = get_set_angles()
1035
        inp_vars['sample_points_per_angle'] = get_pts_per_angle()
1036
        inp_vars['collision_threshold'] = get_coll_thrsld()
1037
        inp_vars['min_coll_height'] = get_min_coll_height(
1038
            inp_vars['surf_norm_vect'])
1039
        if inp_vars['min_coll_height'] is False \
1040
                and inp_vars['collision_threshold'] is False:
1041
            logger.warning("Collisions are deactivated: Overlapping of "
1042
                           "adsorbate and surface is possible")
1043
        inp_vars['exclude_ads_ctr'] = get_exclude_ads_ctr()
1044
        inp_vars['h_donor'] = get_H_donor(inp_vars['special_atoms'])
1045
        inp_vars['max_structures'] = get_max_structures()
1046
        inp_vars['use_molec_file'] = get_use_molec_file()
1047

    
1048
        # Options depending on the value of others
1049
        if inp_vars['set_angles'] == "internal":
1050
            internal_opts = ['molec_ctrs2', 'molec_ctrs3', 'surf_ctrs2',
1051
                             'max_helic_angle']
1052
            for opt in internal_opts:
1053
                if not dos_inp.has_option('Screening', opt):
1054
                    logger.error(no_opt_err % (opt, 'Screening'))
1055
                    raise NoOptionError(opt, 'Screening')
1056
            inp_vars['max_helic_angle'] = get_max_helic_angle()
1057
            inp_vars['molec_ctrs2'] = get_molec_ctrs2()
1058
            inp_vars['molec_ctrs3'] = get_molec_ctrs3()
1059
            inp_vars['surf_ctrs2'] = get_surf_ctrs2()
1060
            if len(inp_vars["molec_ctrs2"]) != len(inp_vars['molec_ctrs']) \
1061
                    or len(inp_vars["molec_ctrs3"]) != \
1062
                    len(inp_vars['molec_ctrs']) \
1063
                    or len(inp_vars['surf_ctrs2']) != len(inp_vars['sites']):
1064
                err_msg = "'molec_ctrs' 'molec_ctrs2' and 'molec_ctrs3' must " \
1065
                          "have the same number of indices "
1066
                logger.error(err_msg)
1067
                raise ValueError(err_msg)
1068

    
1069
        if inp_vars['h_donor'] is False:
1070
            inp_vars['h_acceptor'] = False
1071
        else:
1072
            inp_vars['h_acceptor'] = get_H_acceptor(inp_vars['special_atoms'])
1073

    
1074
    # Refinement
1075
    if refinement:
1076
        if not dos_inp.has_section('Refinement'):
1077
            logger.error(no_sect_err % 'Refinement')
1078
            raise NoSectionError('Refinement')
1079
        # Mandatory options
1080
        # Checks whether the mandatory options are present.
1081
        ref_mand_opts = ['refine_inp_file']
1082
        for opt in ref_mand_opts:
1083
            if not dos_inp.has_option('Refinement', opt):
1084
                logger.error(no_opt_err % (opt, 'Refinement'))
1085
                raise NoOptionError(opt, 'Refinement')
1086
        if 'potcar_dir' in inp_vars:
1087
            inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'],
1088
                                                              inp_vars[
1089
                                                                  'potcar_dir'])
1090
        else:
1091
            inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'])
1092

    
1093
        # Facultative options (Default value present)
1094
        inp_vars['energy_cutoff'] = get_energy_cutoff()
1095
        # end energy_cutoff
1096

    
1097
    return_vars_str = "\n\t".join([str(key) + ": " + str(value)
1098
                                   for key, value in inp_vars.items()])
1099
    logger.info(f'Correctly read {in_file} parameters:'
1100
                f' \n\n\t{return_vars_str}\n')
1101

    
1102
    return inp_vars
1103

    
1104

    
1105
if __name__ == "__main__":
1106
    import sys
1107

    
1108
    print(read_input(sys.argv[1]))