Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / dos_input.py @ 7d37379d

Historique | Voir | Annoter | Télécharger (41,33 ko)

1
"""Functions to deal with DockOnSurf input files.
2

3
Functions
4
try_command:Tries to run a command and logs its exceptions (expected and not).
5
str2lst: Converts a string of integers, and groups of them, to a list of lists.
6
check_expect_val: Checks whether the value of an option has an adequate value.
7
read_input: Sets up the calculation by reading the parameters from input file.
8
get_run_type: Gets 'run_type' value and checks that its value is acceptable.
9
get_code: Gets 'code' value and checks that its value is acceptable.
10
get_batch_q_sys: Gets 'batch_q_sys' value and checks that its value is
11
acceptable.
12
get_relaunch_err: Gets 'relaunch_err' value and checks that its value is
13
acceptable.
14
get_max_jobs: Gets 'max_jobs' value and checks that its value is acceptable.
15
get_special_atoms: Gets 'special_atoms' value and checks that its value is
16
acceptable.
17
get_isol_inp_file: Gets 'isol_inp_file' value and checks that its value is
18
acceptable.
19
get_cluster_magns: Gets 'cluster_magns' value and checks that its value is
20
acceptable.
21
get_num_conformers: Gets 'num_conformers' value and checks that its value is
22
acceptable.
23
get_num_prom_cand: Gets 'num_prom_cand' value and checks that its value is
24
acceptable.
25
get_iso_rmsd: Gets 'iso_rmsd' value and checks that its value is acceptable.
26
get_pre_opt: Gets 'pre_opt' value and checks that its value is acceptable.
27
get_screen_inp_file: Gets 'screen_inp_file' value and checks that its value is
28
acceptable.
29
get_sites: Gets 'sites' value and checks that its value is acceptable.
30
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is
31
acceptable.
32
get_try_disso: Gets 'try_disso' value and checks that its value is acceptable.
33
get_pts_per_angle: Gets 'pts_per_angle' value and checks that its value is
34
acceptable.
35
get_coll_thrsld: Gets 'coll_thrsld' value and checks that its value is
36
acceptable.
37
get_screen_rmsd: Gets 'screen_rmsd' value and checks that its value is
38
acceptable.
39
get_coll_bottom_z: Gets 'coll_bottom_z' value and checks that its value is
40
acceptable.
41
get_refine_inp_file: Gets 'refine_inp_file' value and checks that its value is
42
acceptable.
43
get_energy_cutoff: Gets 'energy_cutoff' value and checks that its value is
44
acceptable.
45
"""
46
import os.path
47
import logging
48
from configparser import ConfigParser, NoSectionError, NoOptionError, \
49
    MissingSectionHeaderError, DuplicateOptionError
50
import numpy as np
51
from modules.utilities import try_command
52

    
53
logger = logging.getLogger('DockOnSurf')
54

    
55
dos_inp = ConfigParser(inline_comment_prefixes='#',
56
                       empty_lines_in_values=False)
57

    
58
new_answers = {'n': False, 'none': False, 'nay': False,
59
               'y': True, '': True, 'aye': True, 'sure': True}
60
for answer, val in new_answers.items():
61
    dos_inp.BOOLEAN_STATES[answer] = val  # TODO Check value 0
62
turn_false_answers = [answer for answer in dos_inp.BOOLEAN_STATES
63
                      if dos_inp.BOOLEAN_STATES[answer] is False]
64
turn_true_answers = [answer for answer in dos_inp.BOOLEAN_STATES
65
                     if dos_inp.BOOLEAN_STATES[answer]]
66

    
67
no_sect_err = "Section '%s' not found on input file"
68
no_opt_err = "Option '%s' not found on section '%s'"
69
num_error = "'%s' value must be a %s"
70

    
71

    
72
# Auxilary functions
73

    
74
def str2lst(cmplx_str, func=int):  # TODO: enable deeper level of nested lists
75
    # TODO Treat all-enclosing parenthesis as a list instead of list of lists.
76
    """Converts a string of integers, and groups of them, to a list.
77

78
    Keyword arguments:
79
    @param cmplx_str: str, string of integers (or floats) and groups of them
80
    enclosed by parentheses-like characters.
81
    - Group enclosers: '()' '[]' and '{}'.
82
    - Separators: ',' ';' and ' '.
83
    - Nested groups are not allowed: '3 ((6 7) 8) 4'.
84
    @param func: either to use int or float
85

86
    @return list, list of integers (or floats), or list of integers (or floats)
87
    in the case they were grouped. First, the singlets are placed, and then the
88
    groups in input order.
89

90
    eg. '128,(135 138;141] 87 {45, 68}' -> [128, 87, [135, 138, 141], [45, 68]]
91
    """
92

    
93
    # Checks
94
    error_msg = "Function argument should be a str, sequence of " \
95
                "numbers separated by ',' ';' or ' '." \
96
                "\nThey can be grouped in parentheses-like enclosers: '()', " \
97
                "'[]' or {}. Nested groups are not allowed. \n" \
98
                "eg. 128,(135 138;141) 87 {45, 68}"
99
    cmplx_str = try_command(cmplx_str.replace, [(AttributeError, error_msg)],
100
                            ',', ' ')
101

    
102
    cmplx_str = cmplx_str.replace(';', ' ').replace('[', '(').replace(
103
        ']', ')').replace('{', '(').replace('}', ')')
104

    
105
    try_command(list, [(ValueError, error_msg)], map(func, cmplx_str.replace(
106
        ')', '').replace('(', '').split()))
107

    
108
    deepness = 0
109
    for el in cmplx_str.split():
110
        if '(' in el:
111
            deepness += 1
112
        if ')' in el:
113
            deepness += -1
114
        if deepness > 1 or deepness < 0:
115
            logger.error(error_msg)
116
            raise ValueError(error_msg)
117

    
118
    init_list = cmplx_str.split()
119
    start_group = []
120
    end_group = []
121
    for i, element in enumerate(init_list):
122
        if '(' in element:
123
            start_group.append(i)
124
            init_list[i] = element.replace('(', '')
125
        if ')' in element:
126
            end_group.append(i)
127
            init_list[i] = element.replace(')', '')
128

    
129
    init_list = list(map(func, init_list))
130

    
131
    new_list = []
132
    for start_el, end_el in zip(start_group, end_group):
133
        new_list.append(init_list[start_el:end_el + 1])
134

    
135
    for v in new_list:
136
        for el in v:
137
            init_list.remove(el)
138
    return init_list + new_list
139

    
140

    
141
def check_expect_val(value, expect_vals, err_msg=None):
142
    """Checks whether an option lies within its expected values.
143

144
    Keyword arguments:
145
    @param value: The variable to check if its value lies within the expected
146
    ones
147
    @param expect_vals: list, list of values allowed for the present option.
148
    @param err_msg: The error message to be prompted in both log and screen.
149
    @raise ValueError: if the value is not among the expected ones.
150
    @return True if the value is among the expected ones.
151
    """
152
    if err_msg is None:
153
        err_msg = f"'{value}' is not an adequate value.\n" \
154
                  f"Adequate values: {expect_vals}"
155
    if not any([exp_val == value for exp_val in expect_vals]):
156
        logger.error(err_msg)
157
        raise ValueError(err_msg)
158

    
159
    return True
160

    
161

    
162
def check_inp_files(inp_files, code, potcar_dir=None):
163
    if code == 'cp2k':
164
        from pycp2k import CP2K
165
        if not isinstance(inp_files, str):
166
            err_msg = "When using CP2K, only one input file is allowed"
167
            logger.error(err_msg)
168
            ValueError(err_msg)
169
        elif not os.path.isfile(inp_files):
170
            err_msg = f"Input file {inp_files} was not found."
171
            logger.error(err_msg)
172
            raise FileNotFoundError(err_msg)
173
        cp2k = CP2K()
174
        try_command(cp2k.parse,
175
                    [(UnboundLocalError, "Invalid CP2K input file")], inp_files)
176
    elif code == "vasp":
177
        if not potcar_dir:
178
            mand_files = ["INCAR", "KPOINTS", "POTCAR"]
179
        else:
180
            mand_files = ["INCAR", "KPOINTS"]
181
            if any("POTCAR" in inp_file for inp_file in inp_files):
182
                logger.warning("A POTCAR file was specified as input file "
183
                               "while the automatic generation of POTCAR was "
184
                               "also enabled via the 'potcar_dir' keyword. The "
185
                               "POTCAR specified as input_file will be used "
186
                               "instead of the auto-generated one.")
187
        # Check that it inp_files is a list of file paths
188
        if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
189
                                                   for inp_file in inp_files):
190
            err_msg = "'inp_files' should be a list of file names/paths"
191
            logger.error(err_msg)
192
            raise ValueError(err_msg)
193
        # Check that all mandatory files are defined once and just once.
194
        elif [[mand_file in inp_file for inp_file in inp_files].count(True)
195
              for mand_file in mand_files].count(1) != len(mand_files):
196
            err_msg = f"Each of the mandatory files {mand_files} must be " \
197
                      f"defined once and just once."
198
            logger.error(err_msg)
199
            raise FileNotFoundError(err_msg)
200
        # Check that the defined files exist
201
        elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
202
            err_msg = f"At least one of the mandatory files {mand_files} was " \
203
                      "not found."
204
            logger.error(err_msg)
205
            raise FileNotFoundError(err_msg)
206
        # Check that mandatory files are actual vasp files.
207
        else:
208
            from pymatgen.io.vasp.inputs import Incar, Kpoints, Potcar
209
            for inp_file in inp_files:
210
                file_name = inp_file.split("/")[-1]
211
                if not any(mand_file in file_name for mand_file in mand_files):
212
                    continue
213
                file_type = ""
214
                for mand_file in mand_files:
215
                    if mand_file in inp_file:
216
                        file_type = mand_file
217
                err = False
218
                err_msg = f"'{inp_file}' is not a valid {file_name} file."
219
                try:
220
                    eval(file_type.capitalize()).from_file(inp_file)
221
                except ValueError:
222
                    logger.error(err_msg)
223
                    err = ValueError(err_msg)
224
                except IndexError:
225
                    logger.error(err_msg)
226
                    err = IndexError(err_msg)
227
                else:
228
                    if file_name == "INCAR":
229
                        Incar.from_file("INCAR").check_params()
230
                finally:
231
                    if isinstance(err, BaseException):
232
                        raise err
233

    
234

    
235
# Global
236

    
237
def get_run_type():
238
    isolated, screening, refinement = (False, False, False)
239
    run_type_vals = ['isolated', 'screening', 'refinement', 'adsorption',
240
                     'full']
241
    run_types = dos_inp.get('Global', 'run_type').split()
242
    for run_type in run_types:
243
        check_expect_val(run_type.lower(), run_type_vals)
244
        if 'isol' in run_type.lower():
245
            isolated = True
246
        if 'screen' in run_type.lower():
247
            screening = True
248
        if 'refine' in run_type.lower():
249
            refinement = True
250
        if 'adsor' in run_type.lower():
251
            screening, refinement = (True, True)
252
        if 'full' in run_type.lower():
253
            isolated, screening, refinement = (True, True, True)
254

    
255
    return isolated, screening, refinement
256

    
257

    
258
def get_code():
259
    code_vals = ['cp2k', 'vasp']
260
    check_expect_val(dos_inp.get('Global', 'code').lower(), code_vals)
261
    code = dos_inp.get('Global', 'code').lower()
262
    return code
263

    
264

    
265
def get_batch_q_sys():
266
    batch_q_sys_vals = ['sge', 'lsf', 'irene', 'local'] + turn_false_answers
267
    check_expect_val(dos_inp.get('Global', 'batch_q_sys').lower(),
268
                     batch_q_sys_vals)
269
    batch_q_sys = dos_inp.get('Global', 'batch_q_sys').lower()
270
    if batch_q_sys.lower() in turn_false_answers:
271
        return False
272
    else:
273
        return batch_q_sys
274

    
275

    
276
def get_pbc_cell():
277
    from ase.atoms import Cell
278
    err_msg = "'pbc_cell' must be either 3 vectors of size 3 or False."
279
    pbc_cell_str = dos_inp.get('Global', 'pbc_cell', fallback="False")
280
    if pbc_cell_str.lower() in turn_false_answers:
281
        return False
282
    else:
283
        pbc_cell = np.array(try_command(str2lst, [(ValueError, err_msg)],
284
                                        pbc_cell_str, float))
285
        if pbc_cell.shape != (3, 3):
286
            logger.error(err_msg)
287
            raise ValueError(err_msg)
288
        if np.linalg.det(pbc_cell) == 0.0:
289
            err_msg = "The volume of the defined cell is 0"
290
            logger.error(err_msg)
291
            raise ValueError(err_msg)
292
        return Cell(pbc_cell)
293

    
294

    
295
def get_subm_script():
296
    subm_script = dos_inp.get('Global', 'subm_script')
297
    if not os.path.isfile(subm_script):
298
        logger.error(f'File {subm_script} not found.')
299
        raise FileNotFoundError(f'File {subm_script} not found')
300
    return subm_script
301

    
302

    
303
def get_project_name():
304
    project_name = dos_inp.get('Global', 'project_name', fallback='')
305
    return project_name
306

    
307

    
308
def get_relaunch_err():
309
    relaunch_err_vals = ['geo_not_conv']
310
    relaunch_err = dos_inp.get('Global', 'relaunch_err',
311
                               fallback="False")
312
    if relaunch_err.lower() in turn_false_answers:
313
        return False
314
    else:
315
        check_expect_val(relaunch_err.lower(), relaunch_err_vals)
316
    return relaunch_err
317

    
318

    
319
def get_max_jobs():
320
    import re
321
    err_msg = "'max_jobs' must be a list of, number plus 'p', 'q' or 'r', or " \
322
              "a combination of them without repeating letters.\n" \
323
              "eg: '2r 3p 4pr', '5q' or '3r 3p'"
324
    max_jobs_str = dos_inp.get('Global', 'max_jobs', fallback="inf").lower()
325
    str_vals = ["r", "p", "q", "rp", "rq", "pr", "qr"]
326
    max_jobs = {"r": np.inf, "p": np.inf, "rp": np.inf}
327
    if "inf" == max_jobs_str:
328
        return {"r": np.inf, "p": np.inf, "rp": np.inf}
329
    # Iterate over the number of requirements:
330
    for req in max_jobs_str.split():
331
        # Split numbers from letters into a list
332
        req_parts = re.findall(r'[a-z]+|\d+', req)
333
        if len(req_parts) != 2:
334
            logger.error(err_msg)
335
            raise ValueError(err_msg)
336
        if req_parts[0].isdecimal():
337
            req_parts[1] = req_parts[1].replace('q', 'p').replace('pr', 'rp')
338
            if req_parts[1] in str_vals and max_jobs[req_parts[1]] == np.inf:
339
                max_jobs[req_parts[1]] = int(req_parts[0])
340
        elif req_parts[1].isdecimal():
341
            req_parts[0] = req_parts[0].replace('q', 'p').replace('pr', 'rp')
342
            if req_parts[0] in str_vals and max_jobs[req_parts[0]] == np.inf:
343
                max_jobs[req_parts[0]] = int(req_parts[1])
344
        else:
345
            logger.error(err_msg)
346
            raise ValueError(err_msg)
347

    
348
    return max_jobs
349

    
350

    
351
def get_special_atoms():
352
    from ase.data import chemical_symbols
353

    
354
    spec_at_err = '\'special_atoms\' does not have an adequate format.\n' \
355
                  'Adequate format: (Fe1 Fe) (O1 O)'
356
    special_atoms = dos_inp.get('Global', 'special_atoms', fallback="False")
357
    if special_atoms.lower() in turn_false_answers:
358
        special_atoms = []
359
    else:
360
        # Converts the string into a list of tuples
361
        lst_tple = [tuple(pair.replace("(", "").split()) for pair in
362
                    special_atoms.split(")")[:-1]]
363
        if len(lst_tple) == 0:
364
            logger.error(spec_at_err)
365
            raise ValueError(spec_at_err)
366
        for i, tup in enumerate(lst_tple):
367
            if not isinstance(tup, tuple) or len(tup) != 2:
368
                logger.error(spec_at_err)
369
                raise ValueError(spec_at_err)
370
            if tup[1].capitalize() not in chemical_symbols:
371
                elem_err = "The second element of the couple should be an " \
372
                           "actual element of the periodic table"
373
                logger.error(elem_err)
374
                raise ValueError(elem_err)
375
            if tup[0].capitalize() in chemical_symbols:
376
                elem_err = "The first element of the couple is already an " \
377
                           "actual element of the periodic table, "
378
                logger.error(elem_err)
379
                raise ValueError(elem_err)
380
            for j, tup2 in enumerate(lst_tple):
381
                if j <= i:
382
                    continue
383
                if tup2[0] == tup[0]:
384
                    label_err = f'You have specified the label {tup[0]} to ' \
385
                                f'more than one special atom'
386
                    logger.error(label_err)
387
                    raise ValueError(label_err)
388
        special_atoms = lst_tple
389
    return special_atoms
390

    
391

    
392
def get_potcar_dir():
393
    potcar_dir = dos_inp.get('Global', 'potcar_dir', fallback="False")
394
    if potcar_dir.lower() in turn_false_answers:
395
        return False
396
    elif not os.path.isdir(potcar_dir):
397
        err_msg = "'potcar_dir' must be either False or a directory."
398
        logger.error(err_msg)
399
        raise ValueError(err_msg)
400
    else:
401
        return potcar_dir
402

    
403

    
404
# Isolated
405

    
406
def get_isol_inp_file(code, potcar_dir=None):  # TODO allow spaces in path names
407
    inp_file_lst = dos_inp.get('Isolated', 'isol_inp_file').split()
408
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
409
                    code, potcar_dir)
410
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
411

    
412

    
413
def get_molec_file():
414
    molec_file = dos_inp.get('Isolated', 'molec_file')
415
    if not os.path.isfile(molec_file):
416
        logger.error(f'File {molec_file} not found.')
417
        raise FileNotFoundError(f'File {molec_file} not found')
418
    return molec_file
419

    
420

    
421
def get_num_conformers():
422
    err_msg = num_error % ('num_conformers', 'positive integer')
423
    num_conformers = try_command(dos_inp.getint, [(ValueError, err_msg)],
424
                                 'Isolated', 'num_conformers', fallback=100)
425
    if num_conformers < 1:
426
        logger.error(err_msg)
427
        raise ValueError(err_msg)
428
    return num_conformers
429

    
430

    
431
def get_pre_opt():
432
    pre_opt_vals = ['uff', 'mmff'] + turn_false_answers
433
    check_expect_val(dos_inp.get('Isolated', 'pre_opt').lower(), pre_opt_vals)
434
    pre_opt = dos_inp.get('Isolated', 'pre_opt').lower()
435
    if pre_opt in turn_false_answers:
436
        return False
437
    else:
438
        return pre_opt
439

    
440

    
441
# Screening
442

    
443
def get_screen_inp_file(code,
444
                        potcar_dir=None):  # TODO allow spaces in path names
445
    inp_file_lst = dos_inp.get('Screening', 'screen_inp_file').split()
446
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
447
                    code, potcar_dir)
448
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
449

    
450

    
451
def get_surf_file():
452
    surf_file = dos_inp.get('Screening', 'surf_file')
453
    if not os.path.isfile(surf_file):
454
        logger.error(f'File {surf_file} not found.')
455
        raise FileNotFoundError(f'File {surf_file} not found')
456
    return surf_file
457

    
458

    
459
def get_sites():
460
    err_msg = 'The value of sites should be a list of atom numbers ' \
461
              '(ie. positive integers) or groups of atom numbers ' \
462
              'grouped by parentheses-like enclosers. \n' \
463
              'eg. 128,(135 138;141) 87 {45, 68}'
464
    # Convert the string into a list of lists
465
    sites = try_command(str2lst,
466
                        [(ValueError, err_msg), (AttributeError, err_msg)],
467
                        dos_inp.get('Screening', 'sites'))
468
    # Check all elements of the list (of lists) are positive integers
469
    for site in sites:
470
        if type(site) is list:
471
            for atom in site:
472
                if atom < 0:
473
                    logger.error(err_msg)
474
                    raise ValueError(err_msg)
475
        elif type(site) is int:
476
            if site < 0:
477
                logger.error(err_msg)
478
                raise ValueError(err_msg)
479
        else:
480
            logger.error(err_msg)
481
            raise ValueError(err_msg)
482

    
483
    return sites
484

    
485

    
486
def get_surf_ctrs2():
487
    err_msg = 'The value of surf_ctrs2 should be a list of atom numbers ' \
488
              '(ie. positive integers) or groups of atom numbers ' \
489
              'grouped by parentheses-like enclosers. \n' \
490
              'eg. 128,(135 138;141) 87 {45, 68}'
491
    # Convert the string into a list of lists
492
    surf_ctrs2 = try_command(str2lst,
493
                             [(ValueError, err_msg), (AttributeError, err_msg)],
494
                             dos_inp.get('Screening', 'surf_ctrs2'))
495
    # Check all elements of the list (of lists) are positive integers
496
    for ctr in surf_ctrs2:
497
        if type(ctr) is list:
498
            for atom in ctr:
499
                if atom < 0:
500
                    logger.error(err_msg)
501
                    raise ValueError(err_msg)
502
        elif type(ctr) is int:
503
            if ctr < 0:
504
                logger.error(err_msg)
505
                raise ValueError(err_msg)
506
        else:
507
            logger.error(err_msg)
508
            raise ValueError(err_msg)
509

    
510
    return surf_ctrs2
511

    
512

    
513
def get_molec_ctrs():
514
    err_msg = 'The value of molec_ctrs should be a list of atom' \
515
              ' numbers (ie. positive integers) or groups of atom ' \
516
              'numbers enclosed by parentheses-like characters. \n' \
517
              'eg. 128,(135 138;141) 87 {45, 68}'
518
    # Convert the string into a list of lists
519
    molec_ctrs = try_command(str2lst,
520
                             [(ValueError, err_msg),
521
                              (AttributeError, err_msg)],
522
                             dos_inp.get('Screening', 'molec_ctrs'))
523
    # Check all elements of the list (of lists) are positive integers
524
    for ctr in molec_ctrs:
525
        if isinstance(ctr, list):
526
            for atom in ctr:
527
                if atom < 0:
528
                    logger.error(err_msg)
529
                    raise ValueError(err_msg)
530
        elif isinstance(ctr, int):
531
            if ctr < 0:
532
                logger.error(err_msg)
533
                raise ValueError(err_msg)
534
        else:
535
            logger.error(err_msg)
536
            raise ValueError(err_msg)
537

    
538
    return molec_ctrs
539

    
540

    
541
def get_molec_ctrs2():
542
    err_msg = 'The value of molec_ctrs2 should be a list of atom ' \
543
              'numbers (ie. positive integers) or groups of atom ' \
544
              'numbers enclosed by parentheses-like characters. \n' \
545
              'eg. 128,(135 138;141) 87 {45, 68}'
546
    # Convert the string into a list of lists
547
    molec_ctrs2 = try_command(str2lst, [(ValueError, err_msg),
548
                                        (AttributeError, err_msg)],
549
                              dos_inp.get('Screening', 'molec_ctrs2'))
550

    
551
    # Check all elements of the list (of lists) are positive integers
552
    for ctr in molec_ctrs2:
553
        if isinstance(ctr, list):
554
            for atom in ctr:
555
                if atom < 0:
556
                    logger.error(err_msg)
557
                    raise ValueError(err_msg)
558
        elif isinstance(ctr, int):
559
            if ctr < 0:
560
                logger.error(err_msg)
561
                raise ValueError(err_msg)
562
        else:
563
            logger.error(err_msg)
564
            raise ValueError(err_msg)
565

    
566
    return molec_ctrs2
567

    
568

    
569
def get_molec_ctrs3():
570
    err_msg = 'The value of molec_ctrs3 should be a list of atom ' \
571
              'numbers (ie. positive integers) or groups of atom ' \
572
              'numbers enclosed by parentheses-like characters. \n' \
573
              'eg. 128,(135 138;141) 87 {45, 68}'
574
    # Convert the string into a list of lists
575
    molec_ctrs3 = try_command(str2lst, [(ValueError, err_msg),
576
                                        (AttributeError, err_msg)],
577
                              dos_inp.get('Screening', 'molec_ctrs3'))
578

    
579
    # Check all elements of the list (of lists) are positive integers
580
    for ctr in molec_ctrs3:
581
        if isinstance(ctr, list):
582
            for atom in ctr:
583
                if atom < 0:
584
                    logger.error(err_msg)
585
                    raise ValueError(err_msg)
586
        elif isinstance(ctr, int):
587
            if ctr < 0:
588
                logger.error(err_msg)
589
                raise ValueError(err_msg)
590
        else:
591
            logger.error(err_msg)
592
            raise ValueError(err_msg)
593

    
594
    return molec_ctrs3
595

    
596

    
597
def get_max_helic_angle():
598
    err_msg = "'max_helic_angle' must be a positive number in degrees"
599
    max_helic_angle = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
600
                                  'Screening', 'max_helic_angle',
601
                                  fallback=180.0)
602
    if max_helic_angle < 0:
603
        logger.error(err_msg)
604
        raise ValueError(err_msg)
605

    
606
    return max_helic_angle
607

    
608

    
609
def get_select_magns():
610
    select_magns_vals = ['energy', 'moi']
611
    select_magns_str = dos_inp.get('Screening', 'select_magns',
612
                                   fallback='moi')
613
    select_magns_str.replace(',', ' ').replace(';', ' ')
614
    select_magns = select_magns_str.split(' ')
615
    select_magns = [m.lower() for m in select_magns]
616
    for m in select_magns:
617
        check_expect_val(m, select_magns_vals)
618
    return select_magns
619

    
620

    
621
def get_confs_per_magn():
622
    err_msg = num_error % ('confs_per_magn', 'positive integer')
623
    confs_per_magn = try_command(dos_inp.getint, [(ValueError, err_msg)],
624
                                 'Screening', 'confs_per_magn', fallback=2)
625
    if confs_per_magn <= 0:
626
        logger.error(err_msg)
627
        raise ValueError(err_msg)
628
    return confs_per_magn
629

    
630

    
631
def get_surf_norm_vect():
632
    err = "'surf_norm_vect' must be a 3 component vector, 'x', 'y' or 'z', " \
633
          "'auto' or 'asann'."
634
    cart_axes = {'x': [1.0, 0.0, 0.0], '-x': [-1.0, 0.0, 0.0],
635
                 'y': [0.0, 1.0, 0.0], '-y': [0.0, -1.0, 0.0],
636
                 'z': [0.0, 0.0, 1.0], '-z': [0.0, 0.0, -1.0]}
637
    surf_norm_vect_str = dos_inp.get('Screening', 'surf_norm_vect',
638
                                     fallback="auto").lower()
639
    if surf_norm_vect_str == "asann" or surf_norm_vect_str == "auto":
640
        return 'auto'
641
    if surf_norm_vect_str in cart_axes:
642
        return np.array(cart_axes[surf_norm_vect_str])
643
    surf_norm_vect = try_command(str2lst, [(ValueError, err)],
644
                                 surf_norm_vect_str, float)
645
    if len(surf_norm_vect) != 3:
646
        logger.error(err)
647
        raise ValueError(err)
648

    
649
    return np.array(surf_norm_vect) / np.linalg.norm(surf_norm_vect)
650

    
651

    
652
def get_adsorption_height():
653
    err_msg = num_error % ('adsorption_height', 'positive number')
654
    ads_height = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
655
                             'Screening', 'adsorption_height', fallback=2.5)
656
    if ads_height <= 0:
657
        logger.error(err_msg)
658
        raise ValueError(err_msg)
659
    return ads_height
660

    
661

    
662
def get_set_angles():
663
    set_vals = ['euler', 'internal']
664
    check_expect_val(dos_inp.get('Screening', 'set_angles').lower(), set_vals)
665
    set_angles = dos_inp.get('Screening', 'set_angles',
666
                             fallback='euler').lower()
667
    return set_angles
668

    
669

    
670
def get_pts_per_angle():
671
    err_msg = num_error % ('sample_points_per_angle', 'positive integer')
672
    pts_per_angle = try_command(dos_inp.getint,
673
                                [(ValueError, err_msg)],
674
                                'Screening', 'sample_points_per_angle',
675
                                fallback=3)
676
    if pts_per_angle <= 0:
677
        logger.error(err_msg)
678
        raise ValueError(err_msg)
679
    return pts_per_angle
680

    
681

    
682
def get_max_structures():
683
    err_msg = num_error % ('max_structures', 'positive integer')
684
    max_structs = dos_inp.get('Screening', 'max_structures', fallback="False")
685
    if max_structs.lower() in turn_false_answers:
686
        return np.inf
687
    if try_command(int, [(ValueError, err_msg)], max_structs) <= 0:
688
        logger.error(err_msg)
689
        raise ValueError(err_msg)
690
    return int(max_structs)
691

    
692

    
693
def get_coll_thrsld():
694
    err_msg = num_error % ('collision_threshold', 'positive number')
695
    coll_thrsld_str = dos_inp.get('Screening', 'collision_threshold',
696
                                  fallback="False")
697
    if coll_thrsld_str.lower() in turn_false_answers:
698
        return False
699
    coll_thrsld = try_command(float, [(ValueError, err_msg)], coll_thrsld_str)
700

    
701
    if coll_thrsld <= 0:
702
        logger.error(err_msg)
703
        raise ValueError(err_msg)
704

    
705
    return coll_thrsld
706

    
707

    
708
def get_min_coll_height(norm_vect):
709
    err_msg = num_error % ('min_coll_height', 'decimal number')
710
    min_coll_height = dos_inp.get('Screening', 'min_coll_height',
711
                                  fallback="False")
712
    if min_coll_height.lower() in turn_false_answers:
713
        return False
714
    min_coll_height = try_command(float, [(ValueError, err_msg)],
715
                                  min_coll_height)
716
    cart_axes = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0],
717
                 [-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]
718
    err_msg = "'min_coll_height' option is only implemented for " \
719
              "'surf_norm_vect' to be one of the x, y or z axes. "
720
    if not isinstance(norm_vect, str) or norm_vect != 'auto':
721
        check_expect_val(norm_vect.tolist(), cart_axes, err_msg)
722
    return min_coll_height
723

    
724

    
725
def get_exclude_ads_ctr():
726
    err_msg = "exclude_ads_ctr must have a boolean value."
727
    exclude_ads_ctr = try_command(dos_inp.getboolean, [(ValueError, err_msg)],
728
                                  "Screening", "exclude_ads_ctr",
729
                                  fallback=False)
730
    return exclude_ads_ctr
731

    
732

    
733
def get_H_donor(spec_atoms):
734
    from ase.data import chemical_symbols
735
    err_msg = "The value of 'h_donor' must be either False, a chemical symbol " \
736
              "or an atom index"
737
    h_donor_str = dos_inp.get('Screening', 'h_donor', fallback="False")
738
    h_donor = []
739
    if h_donor_str.lower() in turn_false_answers:
740
        return False
741
    err = False
742
    for el in h_donor_str.split():
743
        try:
744
            h_donor.append(int(el))
745
        except ValueError:
746
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
747
                                             for nw_sym in pairs]:
748
                err = True
749
            else:
750
                h_donor.append(el)
751
        finally:
752
            if err:
753
                logger.error(err_msg)
754
                ValueError(err_msg)
755
    return h_donor
756

    
757

    
758
def get_H_acceptor(spec_atoms):
759
    from ase.data import chemical_symbols
760
    err_msg = "The value of 'h_acceptor' must be either 'all', a chemical " \
761
              "symbol or an atom index."
762
    h_acceptor_str = dos_inp.get('Screening', 'h_acceptor', fallback="all")
763
    if h_acceptor_str.lower() == "all":
764
        return "all"
765
    h_acceptor = []
766
    err = False
767
    for el in h_acceptor_str.split():
768
        try:
769
            h_acceptor.append(int(el))
770
        except ValueError:
771
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
772
                                             for nw_sym in pairs]:
773
                err = True
774
            else:
775
                h_acceptor.append(el)
776
        finally:
777
            if err:
778
                logger.error(err_msg)
779
                raise ValueError(err_msg)
780
    return h_acceptor
781

    
782

    
783
def get_use_molec_file():
784
    use_molec_file = dos_inp.get('Screening', 'use_molec_file',
785
                                 fallback='False')
786
    if use_molec_file.lower() in turn_false_answers:
787
        return False
788
    if not os.path.isfile(use_molec_file):
789
        logger.error(f'File {use_molec_file} not found.')
790
        raise FileNotFoundError(f'File {use_molec_file} not found')
791

    
792
    return use_molec_file
793

    
794

    
795
# Refinement
796

    
797
def get_refine_inp_file(code, potcar_dir=None):
798
    inp_file_lst = dos_inp.get('Refinement', 'refine_inp_file').split()
799
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
800
                    code, potcar_dir)
801
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
802

    
803

    
804
def get_energy_cutoff():
805
    err_msg = num_error % ('energy_cutoff', 'positive decimal number')
806
    energy_cutoff = try_command(dos_inp.getfloat,
807
                                [(ValueError, err_msg)],
808
                                'Refinement', 'energy_cutoff', fallback=0.5)
809
    if energy_cutoff < 0:
810
        logger.error(err_msg)
811
        raise ValueError(err_msg)
812
    return energy_cutoff
813

    
814

    
815
# Read input parameters
816

    
817
def read_input(in_file):
818
    from modules.formats import adapt_format
819

    
820
    err_msg = False
821
    try:
822
        dos_inp.read(in_file)
823
    except MissingSectionHeaderError as e:
824
        logger.error('There are options in the input file without a Section '
825
                     'header.')
826
        err_msg = e
827
    except DuplicateOptionError as e:
828
        logger.error('There is an option in the input file that has been '
829
                     'specified more than once.')
830
        err_msg = e
831
    except Exception as e:
832
        err_msg = e
833
    else:
834
        err_msg = False
835
    finally:
836
        if isinstance(err_msg, BaseException):
837
            raise err_msg
838

    
839
    inp_vars = {}
840

    
841
    # Global
842
    if not dos_inp.has_section('Global'):
843
        logger.error(no_sect_err % 'Global')
844
        raise NoSectionError('Global')
845

    
846
    # Mandatory options
847
    # Checks whether the mandatory options 'run_type', 'code', etc. are present.
848
    glob_mand_opts = ['run_type', 'code', 'batch_q_sys']
849
    for opt in glob_mand_opts:
850
        if not dos_inp.has_option('Global', opt):
851
            logger.error(no_opt_err % (opt, 'Global'))
852
            raise NoOptionError(opt, 'Global')
853

    
854
    # Gets which sections are to be carried out
855
    isolated, screening, refinement = get_run_type()
856
    inp_vars['isolated'] = isolated
857
    inp_vars['screening'] = screening
858
    inp_vars['refinement'] = refinement
859
    inp_vars['code'] = get_code()
860
    inp_vars['batch_q_sys'] = get_batch_q_sys()
861

    
862
    # Dependent options:
863
    if inp_vars['batch_q_sys']:
864
        inp_vars['max_jobs'] = get_max_jobs()
865
        if inp_vars['batch_q_sys'] != 'local':
866
            if not dos_inp.has_option('Global', 'subm_script'):
867
                logger.error(no_opt_err % ('subm_script', 'Global'))
868
                raise NoOptionError('subm_script', 'Global')
869
            inp_vars['subm_script'] = get_subm_script()
870
    if inp_vars['code'] == "vasp":
871
        inp_vars['potcar_dir'] = get_potcar_dir()
872

    
873
    # Facultative options (Default/Fallback value present)
874
    inp_vars['pbc_cell'] = get_pbc_cell()
875
    inp_vars['project_name'] = get_project_name()
876
    # inp_vars['relaunch_err'] = get_relaunch_err()
877
    inp_vars['special_atoms'] = get_special_atoms()
878

    
879
    # Isolated
880
    if isolated:
881
        if not dos_inp.has_section('Isolated'):
882
            logger.error(no_sect_err % 'Isolated')
883
            raise NoSectionError('Isolated')
884
        # Mandatory options
885
        # Checks whether the mandatory options are present.
886
        iso_mand_opts = ['isol_inp_file', 'molec_file']
887
        for opt in iso_mand_opts:
888
            if not dos_inp.has_option('Isolated', opt):
889
                logger.error(no_opt_err % (opt, 'Isolated'))
890
                raise NoOptionError(opt, 'Isolated')
891
        if 'potcar_dir' in inp_vars:
892
            inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'],
893
                                                          inp_vars[
894
                                                              'potcar_dir'])
895
        else:
896
            inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'])
897
        inp_vars['molec_file'] = get_molec_file()
898

    
899
        # Checks for PBC
900
        atms = adapt_format('ase', inp_vars['molec_file'],
901
                            inp_vars['special_atoms'])
902
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
903
                and inp_vars['pbc_cell'] is False:
904
            err_msg = "When running calculations with 'VASP', the PBC cell " \
905
                      "should be provided either implicitely inside " \
906
                      "'molec_file' or by setting the 'pbc_cell' option."
907
            logger.error(err_msg)
908
            raise ValueError(err_msg)
909
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
910
            inp_vars['pbc_cell'] = atms.cell
911
            logger.info(f"Obtained pbc_cell from '{inp_vars['molec_file']}' "
912
                        f"file.")
913
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
914
                                                        atms.cell):
915
            logger.warning("'molec_file' has an implicit cell defined "
916
                           f"different than 'pbc_cell'.\n"
917
                           f"'molec_file' = {atms.cell}.\n"
918
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
919
                           "'pbc_cell' value will be used.")
920

    
921
        # Facultative options (Default/Fallback value present)
922
        inp_vars['num_conformers'] = get_num_conformers()
923
        inp_vars['pre_opt'] = get_pre_opt()
924

    
925
    # Screening
926
    if screening:
927
        if not dos_inp.has_section('Screening'):
928
            logger.error(no_sect_err % 'Screening')
929
            raise NoSectionError('Screening')
930
        # Mandatory options:
931
        # Checks whether the mandatory options are present.
932
        screen_mand_opts = ['screen_inp_file', 'surf_file', 'sites',
933
                            'molec_ctrs']
934
        for opt in screen_mand_opts:
935
            if not dos_inp.has_option('Screening', opt):
936
                logger.error(no_opt_err % (opt, 'Screening'))
937
                raise NoOptionError(opt, 'Screening')
938
        if 'potcar_dir' in inp_vars:
939
            inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'],
940
                                                              inp_vars[
941
                                                                  'potcar_dir'])
942
        else:
943
            inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'])
944
        inp_vars['surf_file'] = get_surf_file()
945
        inp_vars['sites'] = get_sites()
946
        inp_vars['molec_ctrs'] = get_molec_ctrs()
947

    
948
        # Checks for PBC
949
        # Checks for PBC
950
        atms = adapt_format('ase', inp_vars['surf_file'],
951
                            inp_vars['special_atoms'])
952
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
953
                and inp_vars['pbc_cell'] is False:
954
            err_msg = "When running calculations with 'VASP', the PBC cell " \
955
                      "should be provided either implicitely inside " \
956
                      "'surf_file' or by setting the 'pbc_cell' option."
957
            logger.error(err_msg)
958
            raise ValueError(err_msg)
959
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
960
            inp_vars['pbc_cell'] = atms.cell
961
            logger.info(f"Obtained pbc_cell from '{inp_vars['surf_file']}' "
962
                        f"file.")
963
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
964
                                                        atms.cell):
965
            logger.warning("'surf_file' has an implicit cell defined "
966
                           f"different than 'pbc_cell'.\n"
967
                           f"'surf_file' = {atms.cell}.\n"
968
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
969
                           "'pbc_cell' value will be used.")
970

    
971
        # Facultative options (Default value present)
972
        inp_vars['select_magns'] = get_select_magns()
973
        inp_vars['confs_per_magn'] = get_confs_per_magn()
974
        inp_vars['surf_norm_vect'] = get_surf_norm_vect()
975
        inp_vars['adsorption_height'] = get_adsorption_height()
976
        inp_vars['set_angles'] = get_set_angles()
977
        inp_vars['sample_points_per_angle'] = get_pts_per_angle()
978
        inp_vars['collision_threshold'] = get_coll_thrsld()
979
        inp_vars['min_coll_height'] = get_min_coll_height(
980
            inp_vars['surf_norm_vect'])
981
        if inp_vars['min_coll_height'] is False \
982
                and inp_vars['collision_threshold'] is False:
983
            logger.warning("Collisions are deactivated: Overlapping of "
984
                           "adsorbate and surface is possible")
985
        inp_vars['exclude_ads_ctr'] = get_exclude_ads_ctr()
986
        inp_vars['h_donor'] = get_H_donor(inp_vars['special_atoms'])
987
        inp_vars['max_structures'] = get_max_structures()
988
        inp_vars['use_molec_file'] = get_use_molec_file()
989

    
990
        # Options depending on the value of others
991
        if inp_vars['set_angles'] == "internal":
992
            internal_opts = ['molec_ctrs2', 'molec_ctrs3', 'surf_ctrs2',
993
                             'max_helic_angle']
994
            for opt in internal_opts:
995
                if not dos_inp.has_option('Screening', opt):
996
                    logger.error(no_opt_err % (opt, 'Screening'))
997
                    raise NoOptionError(opt, 'Screening')
998
            inp_vars['max_helic_angle'] = get_max_helic_angle()
999
            inp_vars['molec_ctrs2'] = get_molec_ctrs2()
1000
            inp_vars['molec_ctrs3'] = get_molec_ctrs3()
1001
            inp_vars['surf_ctrs2'] = get_surf_ctrs2()
1002
            if len(inp_vars["molec_ctrs2"]) != len(inp_vars['molec_ctrs']) \
1003
                    or len(inp_vars["molec_ctrs3"]) != \
1004
                    len(inp_vars['molec_ctrs']) \
1005
                    or len(inp_vars['surf_ctrs2']) != len(inp_vars['sites']):
1006
                err_msg = "'molec_ctrs' 'molec_ctrs2' and 'molec_ctrs3' must " \
1007
                          "have the same number of indices "
1008
                logger.error(err_msg)
1009
                raise ValueError(err_msg)
1010

    
1011
        if inp_vars['h_donor'] is False:
1012
            inp_vars['h_acceptor'] = False
1013
        else:
1014
            inp_vars['h_acceptor'] = get_H_acceptor(inp_vars['special_atoms'])
1015

    
1016
    # Refinement
1017
    if refinement:
1018
        if not dos_inp.has_section('Refinement'):
1019
            logger.error(no_sect_err % 'Refinement')
1020
            raise NoSectionError('Refinement')
1021
        # Mandatory options
1022
        # Checks whether the mandatory options are present.
1023
        ref_mand_opts = ['refine_inp_file']
1024
        for opt in ref_mand_opts:
1025
            if not dos_inp.has_option('Refinement', opt):
1026
                logger.error(no_opt_err % (opt, 'Refinement'))
1027
                raise NoOptionError(opt, 'Refinement')
1028
        if 'potcar_dir' in inp_vars:
1029
            inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'],
1030
                                                              inp_vars[
1031
                                                                  'potcar_dir'])
1032
        else:
1033
            inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'])
1034

    
1035
        # Facultative options (Default value present)
1036
        inp_vars['energy_cutoff'] = get_energy_cutoff()
1037
        # end energy_cutoff
1038

    
1039
    return_vars_str = "\n\t".join([str(key) + ": " + str(value)
1040
                                   for key, value in inp_vars.items()])
1041
    logger.info(f'Correctly read {in_file} parameters:'
1042
                f' \n\n\t{return_vars_str}\n')
1043

    
1044
    return inp_vars
1045

    
1046

    
1047
if __name__ == "__main__":
1048
    import sys
1049

    
1050
    print(read_input(sys.argv[1]))