Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / dos_input.py @ ffa1b366

Historique | Voir | Annoter | Télécharger (39,38 ko)

1
"""Functions to deal with DockOnSurf input files.
2

3
Functions
4
try_command:Tries to run a command and logs its exceptions (expected and not).
5
str2lst: Converts a string of integers, and groups of them, to a list of lists.
6
check_expect_val: Checks whether the value of an option has an adequate value.
7
read_input: Sets up the calculation by reading the parameters from input file.
8
get_run_type: Gets 'run_type' value and checks that its value is acceptable.
9
get_code: Gets 'code' value and checks that its value is acceptable.
10
get_batch_q_sys: Gets 'batch_q_sys' value and checks that its value is
11
acceptable.
12
get_relaunch_err: Gets 'relaunch_err' value and checks that its value is
13
acceptable.
14
get_max_jobs: Gets 'max_jobs' value and checks that its value is acceptable.
15
get_special_atoms: Gets 'special_atoms' value and checks that its value is
16
acceptable.
17
get_isol_inp_file: Gets 'isol_inp_file' value and checks that its value is
18
acceptable.
19
get_cluster_magns: Gets 'cluster_magns' value and checks that its value is
20
acceptable.
21
get_num_conformers: Gets 'num_conformers' value and checks that its value is
22
acceptable.
23
get_num_prom_cand: Gets 'num_prom_cand' value and checks that its value is
24
acceptable.
25
get_iso_rmsd: Gets 'iso_rmsd' value and checks that its value is acceptable.
26
get_pre_opt: Gets 'pre_opt' value and checks that its value is acceptable.
27
get_screen_inp_file: Gets 'screen_inp_file' value and checks that its value is
28
acceptable.
29
get_sites: Gets 'sites' value and checks that its value is acceptable.
30
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is
31
acceptable.
32
get_try_disso: Gets 'try_disso' value and checks that its value is acceptable.
33
get_pts_per_angle: Gets 'pts_per_angle' value and checks that its value is
34
acceptable.
35
get_coll_thrsld: Gets 'coll_thrsld' value and checks that its value is
36
acceptable.
37
get_screen_rmsd: Gets 'screen_rmsd' value and checks that its value is
38
acceptable.
39
get_coll_bottom_z: Gets 'coll_bottom_z' value and checks that its value is
40
acceptable.
41
get_refine_inp_file: Gets 'refine_inp_file' value and checks that its value is
42
acceptable.
43
get_energy_cutoff: Gets 'energy_cutoff' value and checks that its value is
44
acceptable.
45
"""
46
import os.path
47
import logging
48
from configparser import ConfigParser, NoSectionError, NoOptionError, \
49
    MissingSectionHeaderError, DuplicateOptionError
50
import numpy as np
51
from modules.utilities import try_command
52

    
53
logger = logging.getLogger('DockOnSurf')
54

    
55
dos_inp = ConfigParser(inline_comment_prefixes='#',
56
                       empty_lines_in_values=False)
57

    
58
new_answers = {'n': False, 'none': False, 'nay': False,
59
               'y': True, '': True, 'aye': True, 'sure': True}
60
for answer, val in new_answers.items():
61
    dos_inp.BOOLEAN_STATES[answer] = val  # TODO Check value 0
62
turn_false_answers = [answer for answer in dos_inp.BOOLEAN_STATES
63
                      if dos_inp.BOOLEAN_STATES[answer] is False]
64
turn_true_answers = [answer for answer in dos_inp.BOOLEAN_STATES
65
                     if dos_inp.BOOLEAN_STATES[answer]]
66

    
67
no_sect_err = "Section '%s' not found on input file"
68
no_opt_err = "Option '%s' not found on section '%s'"
69
num_error = "'%s' value must be a %s"
70

    
71

    
72
# Auxilary functions
73

    
74
def str2lst(cmplx_str, func=int):  # TODO: enable deeper level of nested lists
75
    # TODO Treat all-enclosing parenthesis as a list instead of list of lists.
76
    """Converts a string of integers, and groups of them, to a list.
77

78
    Keyword arguments:
79
    @param cmplx_str: str, string of integers (or floats) and groups of them
80
    enclosed by parentheses-like characters.
81
    - Group enclosers: '()' '[]' and '{}'.
82
    - Separators: ',' ';' and ' '.
83
    - Nested groups are not allowed: '3 ((6 7) 8) 4'.
84
    @param func: either to use int or float
85

86
    @return list, list of integers (or floats), or list of integers (or floats)
87
    in the case they were grouped. First, the singlets are placed, and then the
88
    groups in input order.
89

90
    eg. '128,(135 138;141] 87 {45, 68}' -> [128, 87, [135, 138, 141], [45, 68]]
91
    """
92

    
93
    # Checks
94
    error_msg = "Function argument should be a str, sequence of " \
95
                "numbers separated by ',' ';' or ' '." \
96
                "\nThey can be grouped in parentheses-like enclosers: '()', " \
97
                "'[]' or {}. Nested groups are not allowed. \n" \
98
                "eg. 128,(135 138;141) 87 {45, 68}"
99
    cmplx_str = try_command(cmplx_str.replace, [(AttributeError, error_msg)],
100
                            ',', ' ')
101

    
102
    cmplx_str = cmplx_str.replace(';', ' ').replace('[', '(').replace(
103
        ']', ')').replace('{', '(').replace('}', ')')
104

    
105
    try_command(list, [(ValueError, error_msg)], map(func, cmplx_str.replace(
106
        ')', '').replace('(', '').split()))
107

    
108
    deepness = 0
109
    for el in cmplx_str.split():
110
        if '(' in el:
111
            deepness += 1
112
        if ')' in el:
113
            deepness += -1
114
        if deepness > 1 or deepness < 0:
115
            logger.error(error_msg)
116
            raise ValueError(error_msg)
117

    
118
    init_list = cmplx_str.split()
119
    start_group = []
120
    end_group = []
121
    for i, element in enumerate(init_list):
122
        if '(' in element:
123
            start_group.append(i)
124
            init_list[i] = element.replace('(', '')
125
        if ')' in element:
126
            end_group.append(i)
127
            init_list[i] = element.replace(')', '')
128

    
129
    init_list = list(map(func, init_list))
130

    
131
    new_list = []
132
    for start_el, end_el in zip(start_group, end_group):
133
        new_list.append(init_list[start_el:end_el + 1])
134

    
135
    for v in new_list:
136
        for el in v:
137
            init_list.remove(el)
138
    return init_list + new_list
139

    
140

    
141
def check_expect_val(value, expect_vals, err_msg=None):
142
    """Checks whether an option lies within its expected values.
143

144
    Keyword arguments:
145
    @param value: The variable to check if its value lies within the expected
146
    ones
147
    @param expect_vals: list, list of values allowed for the present option.
148
    @param err_msg: The error message to be prompted in both log and screen.
149
    @raise ValueError: if the value is not among the expected ones.
150
    @return True if the value is among the expected ones.
151
    """
152
    if err_msg is None:
153
        err_msg = f"'{value}' is not an adequate value.\n" \
154
                  f"Adequate values: {expect_vals}"
155
    if not any([exp_val == value for exp_val in expect_vals]):
156
        logger.error(err_msg)
157
        raise ValueError(err_msg)
158

    
159
    return True
160

    
161

    
162
def check_inp_file(inp_files, code):
163
    if code == 'cp2k':
164
        from pycp2k import CP2K
165
        if not isinstance(inp_files, str):
166
            err_msg = "When using CP2K, only one input file is allowed"
167
            logger.error(err_msg)
168
            ValueError(err_msg)
169
        elif not os.path.isfile(inp_files):
170
            err_msg = f"Input file {inp_files} was not found."
171
            logger.error(err_msg)
172
            raise FileNotFoundError(err_msg)
173
        cp2k = CP2K()
174
        try_command(cp2k.parse,
175
                    [(UnboundLocalError, "Invalid CP2K input file")], inp_files)
176
    elif code == "vasp":
177
        mand_files = ["INCAR", "KPOINTS", "POTCAR"]
178
        # Check that it inp_files is a list of file paths
179
        if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
180
                                                   for inp_file in inp_files):
181
            err_msg = "'inp_files' should be a list of file names/paths"
182
            logger.error(err_msg)
183
            ValueError(err_msg)
184
        # Check that all mandatory files are defined once and just once.
185
        elif [[mand_file in inp_file for inp_file in inp_files].count(True)
186
              for mand_file in mand_files].count(1) != len(mand_files):
187
            err_msg = f"Each of the mandatory files {mand_files} must be " \
188
                      f"defined once and just once."
189
            logger.error(err_msg)
190
            raise FileNotFoundError(err_msg)
191
        # Check that the defined files exist
192
        elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
193
            err_msg = f"At least one of the mandatory files {mand_files} was " \
194
                      "not found."
195
            logger.error(err_msg)
196
            raise FileNotFoundError(err_msg)
197
        # Check that mandatory files are actual vasp files.
198
        else:
199
            from pymatgen.io.vasp.inputs import Incar, Kpoints, Potcar
200
            for inp_file in inp_files:
201
                file_name = inp_file.split("/")[-1]
202
                if not any(mand_file in file_name for mand_file in mand_files):
203
                    continue
204
                file_type = ""
205
                for mand_file in mand_files:
206
                    if mand_file in inp_file:
207
                        file_type = mand_file
208
                err = False
209
                err_msg = f"'{inp_file}' is not a valid {file_name} file."
210
                try:
211
                    eval(file_type.capitalize()).from_file(inp_file)
212
                except ValueError:
213
                    logger.error(err_msg)
214
                    err = ValueError(err_msg)
215
                except IndexError:
216
                    logger.error(err_msg)
217
                    err = IndexError(err_msg)
218
                else:
219
                    if file_name == "INCAR":
220
                        Incar.from_file("INCAR").check_params()
221
                finally:
222
                    if isinstance(err, BaseException):
223
                        raise err
224

    
225

    
226
# Global
227

    
228
def get_run_type():
229
    isolated, screening, refinement = (False, False, False)
230
    run_type_vals = ['isolated', 'screening', 'refinement', 'adsorption',
231
                     'full']
232
    run_types = dos_inp.get('Global', 'run_type').split()
233
    for run_type in run_types:
234
        check_expect_val(run_type.lower(), run_type_vals)
235
        if 'isol' in run_type.lower():
236
            isolated = True
237
        if 'screen' in run_type.lower():
238
            screening = True
239
        if 'refine' in run_type.lower():
240
            refinement = True
241
        if 'adsor' in run_type.lower():
242
            screening, refinement = (True, True)
243
        if 'full' in run_type.lower():
244
            isolated, screening, refinement = (True, True, True)
245

    
246
    return isolated, screening, refinement
247

    
248

    
249
def get_code():
250
    code_vals = ['cp2k', 'vasp']
251
    check_expect_val(dos_inp.get('Global', 'code').lower(), code_vals)
252
    code = dos_inp.get('Global', 'code').lower()
253
    return code
254

    
255

    
256
def get_batch_q_sys():
257
    batch_q_sys_vals = ['sge', 'lsf', 'irene', 'local'] + turn_false_answers
258
    check_expect_val(dos_inp.get('Global', 'batch_q_sys').lower(),
259
                     batch_q_sys_vals)
260
    batch_q_sys = dos_inp.get('Global', 'batch_q_sys').lower()
261
    if batch_q_sys.lower() in turn_false_answers:
262
        return False
263
    else:
264
        return batch_q_sys
265

    
266

    
267
def get_pbc_cell():
268
    from ase.atoms import Cell
269
    err_msg = "'pbc_cell' must be either 3 vectors of size 3 or False."
270
    pbc_cell_str = dos_inp.get('Global', 'pbc_cell', fallback="False")
271
    if pbc_cell_str.lower() in turn_false_answers:
272
        return False
273
    else:
274
        pbc_cell = np.array(try_command(str2lst, [(ValueError, err_msg)],
275
                                        pbc_cell_str, float))
276
        if pbc_cell.shape != (3, 3):
277
            logger.error(err_msg)
278
            raise ValueError(err_msg)
279
        if np.linalg.det(pbc_cell) == 0.0:
280
            err_msg = "The volume of the defined cell is 0"
281
            logger.error(err_msg)
282
            raise ValueError(err_msg)
283
        return Cell(pbc_cell)
284

    
285

    
286
def get_subm_script():
287
    subm_script = dos_inp.get('Global', 'subm_script')
288
    if not os.path.isfile(subm_script):
289
        logger.error(f'File {subm_script} not found.')
290
        raise FileNotFoundError(f'File {subm_script} not found')
291
    return subm_script
292

    
293

    
294
def get_project_name():
295
    project_name = dos_inp.get('Global', 'project_name', fallback='')
296
    return project_name
297

    
298

    
299
def get_relaunch_err():
300
    relaunch_err_vals = ['geo_not_conv']
301
    relaunch_err = dos_inp.get('Global', 'relaunch_err',
302
                               fallback="False")
303
    if relaunch_err.lower() in turn_false_answers:
304
        return False
305
    else:
306
        check_expect_val(relaunch_err.lower(), relaunch_err_vals)
307
    return relaunch_err
308

    
309

    
310
def get_max_jobs():
311
    import re
312
    err_msg = "'max_jobs' must be a list of, number plus 'p', 'q' or 'r', or " \
313
              "a combination of them without repeating letters.\n" \
314
              "eg: '2r 3p 4pr', '5q' or '3r 3p'"
315
    max_jobs_str = dos_inp.get('Global', 'max_jobs', fallback="inf").lower()
316
    str_vals = ["r", "p", "q", "rp", "rq", "pr", "qr"]
317
    max_jobs = {"r": np.inf, "p": np.inf, "rp": np.inf}
318
    if "inf" == max_jobs_str:
319
        return {"r": np.inf, "p": np.inf, "rp": np.inf}
320
    # Iterate over the number of requirements:
321
    for req in max_jobs_str.split():
322
        # Split numbers from letters into a list
323
        req_parts = re.findall(r'[a-z]+|\d+', req)
324
        if len(req_parts) != 2:
325
            logger.error(err_msg)
326
            raise ValueError(err_msg)
327
        if req_parts[0].isdecimal():
328
            req_parts[1] = req_parts[1].replace('q', 'p').replace('pr', 'rp')
329
            if req_parts[1] in str_vals and max_jobs[req_parts[1]] == np.inf:
330
                max_jobs[req_parts[1]] = int(req_parts[0])
331
        elif req_parts[1].isdecimal():
332
            req_parts[0] = req_parts[0].replace('q', 'p').replace('pr', 'rp')
333
            if req_parts[0] in str_vals and max_jobs[req_parts[0]] == np.inf:
334
                max_jobs[req_parts[0]] = int(req_parts[1])
335
        else:
336
            logger.error(err_msg)
337
            raise ValueError(err_msg)
338

    
339
    return max_jobs
340

    
341

    
342
def get_special_atoms():
343
    from ase.data import chemical_symbols
344

    
345
    spec_at_err = '\'special_atoms\' does not have an adequate format.\n' \
346
                  'Adequate format: (Fe1 Fe) (O1 O)'
347
    special_atoms = dos_inp.get('Global', 'special_atoms', fallback="False")
348
    if special_atoms.lower() in turn_false_answers:
349
        special_atoms = []
350
    else:
351
        # Converts the string into a list of tuples
352
        lst_tple = [tuple(pair.replace("(", "").split()) for pair in
353
                    special_atoms.split(")")[:-1]]
354
        if len(lst_tple) == 0:
355
            logger.error(spec_at_err)
356
            raise ValueError(spec_at_err)
357
        for i, tup in enumerate(lst_tple):
358
            if not isinstance(tup, tuple) or len(tup) != 2:
359
                logger.error(spec_at_err)
360
                raise ValueError(spec_at_err)
361
            if tup[1].capitalize() not in chemical_symbols:
362
                elem_err = "The second element of the couple should be an " \
363
                           "actual element of the periodic table"
364
                logger.error(elem_err)
365
                raise ValueError(elem_err)
366
            if tup[0].capitalize() in chemical_symbols:
367
                elem_err = "The first element of the couple is already an " \
368
                           "actual element of the periodic table, "
369
                logger.error(elem_err)
370
                raise ValueError(elem_err)
371
            for j, tup2 in enumerate(lst_tple):
372
                if j <= i:
373
                    continue
374
                if tup2[0] == tup[0]:
375
                    label_err = f'You have specified the label {tup[0]} to ' \
376
                                f'more than one special atom'
377
                    logger.error(label_err)
378
                    raise ValueError(label_err)
379
        special_atoms = lst_tple
380
    return special_atoms
381

    
382

    
383
# Isolated
384

    
385
def get_isol_inp_file(code):  # TODO allow spaces in path names
386
    inp_file_lst = dos_inp.get('Isolated', 'isol_inp_file').split()
387
    check_inp_file(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
388
                   code)
389
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
390

    
391

    
392
def get_molec_file():
393
    molec_file = dos_inp.get('Isolated', 'molec_file')
394
    if not os.path.isfile(molec_file):
395
        logger.error(f'File {molec_file} not found.')
396
        raise FileNotFoundError(f'File {molec_file} not found')
397
    return molec_file
398

    
399

    
400
def get_num_conformers():
401
    err_msg = num_error % ('num_conformers', 'positive integer')
402
    num_conformers = try_command(dos_inp.getint, [(ValueError, err_msg)],
403
                                 'Isolated', 'num_conformers', fallback=100)
404
    if num_conformers < 1:
405
        logger.error(err_msg)
406
        raise ValueError(err_msg)
407
    return num_conformers
408

    
409

    
410
def get_pre_opt():
411
    pre_opt_vals = ['uff', 'mmff'] + turn_false_answers
412
    check_expect_val(dos_inp.get('Isolated', 'pre_opt').lower(), pre_opt_vals)
413
    pre_opt = dos_inp.get('Isolated', 'pre_opt').lower()
414
    if pre_opt in turn_false_answers:
415
        return False
416
    else:
417
        return pre_opt
418

    
419

    
420
# Screening
421

    
422
def get_screen_inp_file(code):  # TODO allow spaces in path names
423
    inp_file_lst = dos_inp.get('Screening', 'screen_inp_file').split()
424
    check_inp_file(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
425
                   code)
426
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
427

    
428

    
429
def get_surf_file():
430
    surf_file = dos_inp.get('Screening', 'surf_file')
431
    if not os.path.isfile(surf_file):
432
        logger.error(f'File {surf_file} not found.')
433
        raise FileNotFoundError(f'File {surf_file} not found')
434
    return surf_file
435

    
436

    
437
def get_sites():
438
    err_msg = 'The value of sites should be a list of atom numbers ' \
439
              '(ie. positive integers) or groups of atom numbers ' \
440
              'grouped by parentheses-like enclosers. \n' \
441
              'eg. 128,(135 138;141) 87 {45, 68}'
442
    # Convert the string into a list of lists
443
    sites = try_command(str2lst,
444
                        [(ValueError, err_msg), (AttributeError, err_msg)],
445
                        dos_inp.get('Screening', 'sites'))
446
    # Check all elements of the list (of lists) are positive integers
447
    for site in sites:
448
        if type(site) is list:
449
            for atom in site:
450
                if atom < 0:
451
                    logger.error(err_msg)
452
                    raise ValueError(err_msg)
453
        elif type(site) is int:
454
            if site < 0:
455
                logger.error(err_msg)
456
                raise ValueError(err_msg)
457
        else:
458
            logger.error(err_msg)
459
            raise ValueError(err_msg)
460

    
461
    return sites
462

    
463

    
464
def get_surf_ctrs2():
465
    err_msg = 'The value of surf_ctrs2 should be a list of atom numbers ' \
466
              '(ie. positive integers) or groups of atom numbers ' \
467
              'grouped by parentheses-like enclosers. \n' \
468
              'eg. 128,(135 138;141) 87 {45, 68}'
469
    # Convert the string into a list of lists
470
    surf_ctrs2 = try_command(str2lst,
471
                             [(ValueError, err_msg), (AttributeError, err_msg)],
472
                             dos_inp.get('Screening', 'surf_ctrs2'))
473
    # Check all elements of the list (of lists) are positive integers
474
    for ctr in surf_ctrs2:
475
        if type(ctr) is list:
476
            for atom in ctr:
477
                if atom < 0:
478
                    logger.error(err_msg)
479
                    raise ValueError(err_msg)
480
        elif type(ctr) is int:
481
            if ctr < 0:
482
                logger.error(err_msg)
483
                raise ValueError(err_msg)
484
        else:
485
            logger.error(err_msg)
486
            raise ValueError(err_msg)
487

    
488
    return surf_ctrs2
489

    
490

    
491
def get_molec_ctrs():
492
    err_msg = 'The value of molec_ctrs should be a list of atom' \
493
              ' numbers (ie. positive integers) or groups of atom ' \
494
              'numbers enclosed by parentheses-like characters. \n' \
495
              'eg. 128,(135 138;141) 87 {45, 68}'
496
    # Convert the string into a list of lists
497
    molec_ctrs = try_command(str2lst,
498
                             [(ValueError, err_msg),
499
                              (AttributeError, err_msg)],
500
                             dos_inp.get('Screening', 'molec_ctrs'))
501
    # Check all elements of the list (of lists) are positive integers
502
    for ctr in molec_ctrs:
503
        if isinstance(ctr, list):
504
            for atom in ctr:
505
                if atom < 0:
506
                    logger.error(err_msg)
507
                    raise ValueError(err_msg)
508
        elif isinstance(ctr, int):
509
            if ctr < 0:
510
                logger.error(err_msg)
511
                raise ValueError(err_msg)
512
        else:
513
            logger.error(err_msg)
514
            raise ValueError(err_msg)
515

    
516
    return molec_ctrs
517

    
518

    
519
def get_molec_ctrs2():
520
    err_msg = 'The value of molec_ctrs2 should be a list of atom ' \
521
              'numbers (ie. positive integers) or groups of atom ' \
522
              'numbers enclosed by parentheses-like characters. \n' \
523
              'eg. 128,(135 138;141) 87 {45, 68}'
524
    # Convert the string into a list of lists
525
    molec_ctrs2 = try_command(str2lst, [(ValueError, err_msg),
526
                                        (AttributeError, err_msg)],
527
                              dos_inp.get('Screening', 'molec_ctrs2'))
528

    
529
    # Check all elements of the list (of lists) are positive integers
530
    for ctr in molec_ctrs2:
531
        if isinstance(ctr, list):
532
            for atom in ctr:
533
                if atom < 0:
534
                    logger.error(err_msg)
535
                    raise ValueError(err_msg)
536
        elif isinstance(ctr, int):
537
            if ctr < 0:
538
                logger.error(err_msg)
539
                raise ValueError(err_msg)
540
        else:
541
            logger.error(err_msg)
542
            raise ValueError(err_msg)
543

    
544
    return molec_ctrs2
545

    
546

    
547
def get_molec_ctrs3():
548
    err_msg = 'The value of molec_ctrs3 should be a list of atom ' \
549
              'numbers (ie. positive integers) or groups of atom ' \
550
              'numbers enclosed by parentheses-like characters. \n' \
551
              'eg. 128,(135 138;141) 87 {45, 68}'
552
    # Convert the string into a list of lists
553
    molec_ctrs3 = try_command(str2lst, [(ValueError, err_msg),
554
                                        (AttributeError, err_msg)],
555
                              dos_inp.get('Screening', 'molec_ctrs3'))
556

    
557
    # Check all elements of the list (of lists) are positive integers
558
    for ctr in molec_ctrs3:
559
        if isinstance(ctr, list):
560
            for atom in ctr:
561
                if atom < 0:
562
                    logger.error(err_msg)
563
                    raise ValueError(err_msg)
564
        elif isinstance(ctr, int):
565
            if ctr < 0:
566
                logger.error(err_msg)
567
                raise ValueError(err_msg)
568
        else:
569
            logger.error(err_msg)
570
            raise ValueError(err_msg)
571

    
572
    return molec_ctrs3
573

    
574

    
575
def get_max_helic_angle():
576
    err_msg = "'max_helic_angle' must be a positive number in degrees"
577
    max_helic_angle = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
578
                                  'Screening', 'max_helic_angle',
579
                                  fallback=180.0)
580
    if max_helic_angle < 0:
581
        logger.error(err_msg)
582
        raise ValueError(err_msg)
583

    
584
    return max_helic_angle
585

    
586

    
587
def get_select_magns():
588
    select_magns_vals = ['energy', 'moi']
589
    select_magns_str = dos_inp.get('Screening', 'select_magns',
590
                                   fallback='moi')
591
    select_magns_str.replace(',', ' ').replace(';', ' ')
592
    select_magns = select_magns_str.split(' ')
593
    select_magns = [m.lower() for m in select_magns]
594
    for m in select_magns:
595
        check_expect_val(m, select_magns_vals)
596
    return select_magns
597

    
598

    
599
def get_confs_per_magn():
600
    err_msg = num_error % ('confs_per_magn', 'positive integer')
601
    confs_per_magn = try_command(dos_inp.getint, [(ValueError, err_msg)],
602
                                 'Screening', 'confs_per_magn', fallback=2)
603
    if confs_per_magn <= 0:
604
        logger.error(err_msg)
605
        raise ValueError(err_msg)
606
    return confs_per_magn
607

    
608

    
609
def get_surf_norm_vect():
610
    err = "'surf_norm_vect' must be a 3 component vector, 'x', 'y' or 'z', " \
611
          "'auto' or 'asann'."
612
    cart_axes = {'x': [1.0, 0.0, 0.0], '-x': [-1.0, 0.0, 0.0],
613
                 'y': [0.0, 1.0, 0.0], '-y': [0.0, -1.0, 0.0],
614
                 'z': [0.0, 0.0, 1.0], '-z': [0.0, 0.0, -1.0]}
615
    surf_norm_vect_str = dos_inp.get('Screening', 'surf_norm_vect',
616
                                     fallback="auto").lower()
617
    if surf_norm_vect_str == "asann" or surf_norm_vect_str == "auto":
618
        return 'auto'
619
    if surf_norm_vect_str in cart_axes:
620
        return np.array(cart_axes[surf_norm_vect_str])
621
    surf_norm_vect = try_command(str2lst, [(ValueError, err)],
622
                                 surf_norm_vect_str, float)
623
    if len(surf_norm_vect) != 3:
624
        logger.error(err)
625
        raise ValueError(err)
626

    
627
    return np.array(surf_norm_vect) / np.linalg.norm(surf_norm_vect)
628

    
629

    
630
def get_adsorption_height():
631
    err_msg = num_error % ('adsorption_height', 'positive number')
632
    ads_height = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
633
                             'Screening', 'adsorption_height', fallback=2.5)
634
    if ads_height <= 0:
635
        logger.error(err_msg)
636
        raise ValueError(err_msg)
637
    return ads_height
638

    
639

    
640
def get_set_angles():
641
    set_vals = ['euler', 'internal']
642
    check_expect_val(dos_inp.get('Screening', 'set_angles').lower(), set_vals)
643
    set_angles = dos_inp.get('Screening', 'set_angles',
644
                             fallback='euler').lower()
645
    return set_angles
646

    
647

    
648
def get_pts_per_angle():
649
    err_msg = num_error % ('sample_points_per_angle', 'positive integer')
650
    pts_per_angle = try_command(dos_inp.getint,
651
                                [(ValueError, err_msg)],
652
                                'Screening', 'sample_points_per_angle',
653
                                fallback=3)
654
    if pts_per_angle <= 0:
655
        logger.error(err_msg)
656
        raise ValueError(err_msg)
657
    return pts_per_angle
658

    
659

    
660
def get_max_structures():
661
    err_msg = num_error % ('max_structures', 'positive integer')
662
    max_structs = dos_inp.get('Screening', 'max_structures', fallback="False")
663
    if max_structs.lower() in turn_false_answers:
664
        return np.inf
665
    if try_command(int, [(ValueError, err_msg)], max_structs) <= 0:
666
        logger.error(err_msg)
667
        raise ValueError(err_msg)
668
    return int(max_structs)
669

    
670

    
671
def get_coll_thrsld():
672
    err_msg = num_error % ('collision_threshold', 'positive number')
673
    coll_thrsld_str = dos_inp.get('Screening', 'collision_threshold',
674
                                  fallback="False")
675
    if coll_thrsld_str.lower() in turn_false_answers:
676
        return False
677
    coll_thrsld = try_command(float, [(ValueError, err_msg)], coll_thrsld_str)
678

    
679
    if coll_thrsld <= 0:
680
        logger.error(err_msg)
681
        raise ValueError(err_msg)
682

    
683
    return coll_thrsld
684

    
685

    
686
def get_min_coll_height(norm_vect):
687
    err_msg = num_error % ('min_coll_height', 'decimal number')
688
    min_coll_height = dos_inp.get('Screening', 'min_coll_height',
689
                                  fallback="False")
690
    if min_coll_height.lower() in turn_false_answers:
691
        return False
692
    min_coll_height = try_command(float, [(ValueError, err_msg)],
693
                                  min_coll_height)
694
    cart_axes = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0],
695
                 [-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]
696
    err_msg = "'min_coll_height' option is only implemented for " \
697
              "'surf_norm_vect' to be one of the x, y or z axes. "
698
    if not isinstance(norm_vect, str) or norm_vect != 'auto':
699
        check_expect_val(norm_vect.tolist(), cart_axes, err_msg)
700
    return min_coll_height
701

    
702

    
703
def get_exclude_ads_ctr():
704
    err_msg = "exclude_ads_ctr must have a boolean value."
705
    exclude_ads_ctr = try_command(dos_inp.getboolean, [(ValueError, err_msg)],
706
                                  "Screening", "exclude_ads_ctr",
707
                                  fallback=False)
708
    return exclude_ads_ctr
709

    
710

    
711
def get_H_donor(spec_atoms):
712
    from ase.data import chemical_symbols
713
    err_msg = "The value of 'h_donor' must be either False, a chemical symbol "\
714
              "or an atom index"
715
    h_donor_str = dos_inp.get('Screening', 'h_donor', fallback="False")
716
    h_donor = []
717
    if h_donor_str.lower() in turn_false_answers:
718
        return False
719
    err = False
720
    for el in h_donor_str.split():
721
        try:
722
            h_donor.append(int(el))
723
        except ValueError:
724
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
725
                                             for nw_sym in pairs]:
726
                err = True
727
            else:
728
                h_donor.append(el)
729
        finally:
730
            if err:
731
                logger.error(err_msg)
732
                ValueError(err_msg)
733
    return h_donor
734

    
735

    
736
def get_H_acceptor(spec_atoms):
737
    from ase.data import chemical_symbols
738
    err_msg = "The value of 'h_acceptor' must be either 'all', a chemical " \
739
              "symbol or an atom index."
740
    h_acceptor_str = dos_inp.get('Screening', 'h_acceptor', fallback="all")
741
    if h_acceptor_str.lower() == "all":
742
        return "all"
743
    h_acceptor = []
744
    err = False
745
    for el in h_acceptor_str.split():
746
        try:
747
            h_acceptor.append(int(el))
748
        except ValueError:
749
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
750
                                             for nw_sym in pairs]:
751
                err = True
752
            else:
753
                h_acceptor.append(el)
754
        finally:
755
            if err:
756
                logger.error(err_msg)
757
                raise ValueError(err_msg)
758
    return h_acceptor
759

    
760

    
761
def get_use_molec_file():
762
    use_molec_file = dos_inp.get('Screening', 'use_molec_file',
763
                                 fallback='False')
764
    if use_molec_file.lower() in turn_false_answers:
765
        return False
766
    if not os.path.isfile(use_molec_file):
767
        logger.error(f'File {use_molec_file} not found.')
768
        raise FileNotFoundError(f'File {use_molec_file} not found')
769

    
770
    return use_molec_file
771

    
772

    
773
# Refinement
774

    
775
def get_refine_inp_file(code):
776
    inp_file_lst = dos_inp.get('Refinement', 'refine_inp_file').split()
777
    check_inp_file(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
778
                   code)
779
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
780

    
781

    
782
def get_energy_cutoff():
783
    err_msg = num_error % ('energy_cutoff', 'positive decimal number')
784
    energy_cutoff = try_command(dos_inp.getfloat,
785
                                [(ValueError, err_msg)],
786
                                'Refinement', 'energy_cutoff', fallback=0.5)
787
    if energy_cutoff < 0:
788
        logger.error(err_msg)
789
        raise ValueError(err_msg)
790
    return energy_cutoff
791

    
792

    
793
# Read input parameters
794

    
795
def read_input(in_file):
796
    from modules.formats import adapt_format
797

    
798
    err_msg = False
799
    try:
800
        dos_inp.read(in_file)
801
    except MissingSectionHeaderError as e:
802
        logger.error('There are options in the input file without a Section '
803
                     'header.')
804
        err_msg = e
805
    except DuplicateOptionError as e:
806
        logger.error('There is an option in the input file that has been '
807
                     'specified more than once.')
808
        err_msg = e
809
    except Exception as e:
810
        err_msg = e
811
    else:
812
        err_msg = False
813
    finally:
814
        if isinstance(err_msg, BaseException):
815
            raise err_msg
816

    
817
    inp_vars = {}
818

    
819
    # Global
820
    if not dos_inp.has_section('Global'):
821
        logger.error(no_sect_err % 'Global')
822
        raise NoSectionError('Global')
823

    
824
    # Mandatory options
825
    # Checks whether the mandatory options 'run_type', 'code', etc. are present.
826
    glob_mand_opts = ['run_type', 'code', 'batch_q_sys']
827
    for opt in glob_mand_opts:
828
        if not dos_inp.has_option('Global', opt):
829
            logger.error(no_opt_err % (opt, 'Global'))
830
            raise NoOptionError(opt, 'Global')
831

    
832
    # Gets which sections are to be carried out
833
    isolated, screening, refinement = get_run_type()
834
    inp_vars['isolated'] = isolated
835
    inp_vars['screening'] = screening
836
    inp_vars['refinement'] = refinement
837
    inp_vars['code'] = get_code()
838
    inp_vars['batch_q_sys'] = get_batch_q_sys()
839

    
840
    # Dependent options:
841
    if inp_vars['batch_q_sys']:
842
        inp_vars['max_jobs'] = get_max_jobs()
843
        if inp_vars['batch_q_sys'] != 'local':
844
            if not dos_inp.has_option('Global', 'subm_script'):
845
                logger.error(no_opt_err % ('subm_script', 'Global'))
846
                raise NoOptionError('subm_script', 'Global')
847
            inp_vars['subm_script'] = get_subm_script()
848

    
849
    # Facultative options (Default/Fallback value present)
850
    inp_vars['pbc_cell'] = get_pbc_cell()
851
    inp_vars['project_name'] = get_project_name()
852
    # inp_vars['relaunch_err'] = get_relaunch_err()
853
    inp_vars['special_atoms'] = get_special_atoms()
854

    
855
    # Isolated
856
    if isolated:
857
        if not dos_inp.has_section('Isolated'):
858
            logger.error(no_sect_err % 'Isolated')
859
            raise NoSectionError('Isolated')
860
        # Mandatory options
861
        # Checks whether the mandatory options are present.
862
        iso_mand_opts = ['isol_inp_file', 'molec_file']
863
        for opt in iso_mand_opts:
864
            if not dos_inp.has_option('Isolated', opt):
865
                logger.error(no_opt_err % (opt, 'Isolated'))
866
                raise NoOptionError(opt, 'Isolated')
867
        inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'])
868
        inp_vars['molec_file'] = get_molec_file()
869

    
870
        # Checks for PBC
871
        atms = adapt_format('ase', inp_vars['molec_file'],
872
                            inp_vars['special_atoms'])
873
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
874
                and inp_vars['pbc_cell'] is False:
875
            err_msg = "When running calculations with 'VASP', the PBC cell " \
876
                      "should be provided either implicitely inside " \
877
                      "'molec_file' or by setting the 'pbc_cell' option."
878
            logger.error(err_msg)
879
            raise ValueError(err_msg)
880
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
881
            inp_vars['pbc_cell'] = atms.cell
882
            logger.info(f"Obtained pbc_cell from '{inp_vars['molec_file']}' "
883
                        f"file.")
884
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
885
                                                        atms.cell):
886
            logger.warning("'molec_file' has an implicit cell defined "
887
                           f"different than 'pbc_cell'.\n"
888
                           f"'molec_file' = {atms.cell}.\n"
889
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
890
                           "'pbc_cell' value will be used.")
891

    
892
        # Facultative options (Default/Fallback value present)
893
        inp_vars['num_conformers'] = get_num_conformers()
894
        inp_vars['pre_opt'] = get_pre_opt()
895

    
896
    # Screening
897
    if screening:
898
        if not dos_inp.has_section('Screening'):
899
            logger.error(no_sect_err % 'Screening')
900
            raise NoSectionError('Screening')
901
        # Mandatory options:
902
        # Checks whether the mandatory options are present.
903
        screen_mand_opts = ['screen_inp_file', 'surf_file', 'sites',
904
                            'molec_ctrs']
905
        for opt in screen_mand_opts:
906
            if not dos_inp.has_option('Screening', opt):
907
                logger.error(no_opt_err % (opt, 'Screening'))
908
                raise NoOptionError(opt, 'Screening')
909
        inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'])
910
        inp_vars['surf_file'] = get_surf_file()
911
        inp_vars['sites'] = get_sites()
912
        inp_vars['molec_ctrs'] = get_molec_ctrs()
913

    
914
        # Checks for PBC
915
        # Checks for PBC
916
        atms = adapt_format('ase', inp_vars['surf_file'],
917
                            inp_vars['special_atoms'])
918
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
919
                and inp_vars['pbc_cell'] is False:
920
            err_msg = "When running calculations with 'VASP', the PBC cell " \
921
                      "should be provided either implicitely inside " \
922
                      "'surf_file' or by setting the 'pbc_cell' option."
923
            logger.error(err_msg)
924
            raise ValueError(err_msg)
925
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
926
            inp_vars['pbc_cell'] = atms.cell
927
            logger.info(f"Obtained pbc_cell from '{inp_vars['surf_file']}' "
928
                        f"file.")
929
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
930
                                                        atms.cell):
931
            logger.warning("'surf_file' has an implicit cell defined "
932
                           f"different than 'pbc_cell'.\n"
933
                           f"'surf_file' = {atms.cell}.\n"
934
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
935
                           "'pbc_cell' value will be used.")
936

    
937
        # Facultative options (Default value present)
938
        inp_vars['select_magns'] = get_select_magns()
939
        inp_vars['confs_per_magn'] = get_confs_per_magn()
940
        inp_vars['surf_norm_vect'] = get_surf_norm_vect()
941
        inp_vars['adsorption_height'] = get_adsorption_height()
942
        inp_vars['set_angles'] = get_set_angles()
943
        inp_vars['sample_points_per_angle'] = get_pts_per_angle()
944
        inp_vars['collision_threshold'] = get_coll_thrsld()
945
        inp_vars['min_coll_height'] = get_min_coll_height(
946
            inp_vars['surf_norm_vect'])
947
        if inp_vars['min_coll_height'] is False \
948
                and inp_vars['collision_threshold'] is False:
949
            logger.warning("Collisions are deactivated: Overlapping of "
950
                           "adsorbate and surface is possible")
951
        inp_vars['exclude_ads_ctr'] = get_exclude_ads_ctr()
952
        inp_vars['h_donor'] = get_H_donor(inp_vars['special_atoms'])
953
        inp_vars['max_structures'] = get_max_structures()
954
        inp_vars['use_molec_file'] = get_use_molec_file()
955

    
956
        # Options depending on the value of others
957
        if inp_vars['set_angles'] == "internal":
958
            internal_opts = ['molec_ctrs2', 'molec_ctrs3', 'surf_ctrs2',
959
                             'max_helic_angle']
960
            for opt in internal_opts:
961
                if not dos_inp.has_option('Screening', opt):
962
                    logger.error(no_opt_err % (opt, 'Screening'))
963
                    raise NoOptionError(opt, 'Screening')
964
            inp_vars['max_helic_angle'] = get_max_helic_angle()
965
            inp_vars['molec_ctrs2'] = get_molec_ctrs2()
966
            inp_vars['molec_ctrs3'] = get_molec_ctrs3()
967
            inp_vars['surf_ctrs2'] = get_surf_ctrs2()
968
            if len(inp_vars["molec_ctrs2"]) != len(inp_vars['molec_ctrs']) \
969
                    or len(inp_vars["molec_ctrs3"]) != \
970
                    len(inp_vars['molec_ctrs']) \
971
                    or len(inp_vars['surf_ctrs2']) != len(inp_vars['sites']):
972
                err_msg = "'molec_ctrs' 'molec_ctrs2' and 'molec_ctrs3' must " \
973
                          "have the same number of indices "
974
                logger.error(err_msg)
975
                raise ValueError(err_msg)
976

    
977
        if inp_vars['h_donor'] is False:
978
            inp_vars['h_acceptor'] = False
979
        else:
980
            inp_vars['h_acceptor'] = get_H_acceptor(inp_vars['special_atoms'])
981

    
982
    # Refinement
983
    if refinement:
984
        if not dos_inp.has_section('Refinement'):
985
            logger.error(no_sect_err % 'Refinement')
986
            raise NoSectionError('Refinement')
987
        # Mandatory options
988
        # Checks whether the mandatory options are present.
989
        ref_mand_opts = ['refine_inp_file']
990
        for opt in ref_mand_opts:
991
            if not dos_inp.has_option('Refinement', opt):
992
                logger.error(no_opt_err % (opt, 'Refinement'))
993
                raise NoOptionError(opt, 'Refinement')
994
        inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'])
995

    
996
        # Facultative options (Default value present)
997
        inp_vars['energy_cutoff'] = get_energy_cutoff()
998
        # end energy_cutoff
999

    
1000
    return_vars_str = "\n\t".join([str(key) + ": " + str(value)
1001
                                   for key, value in inp_vars.items()])
1002
    logger.info(f'Correctly read {in_file} parameters:'
1003
                f' \n\n\t{return_vars_str}\n')
1004

    
1005
    return inp_vars
1006

    
1007

    
1008
if __name__ == "__main__":
1009
    import sys
1010

    
1011
    print(read_input(sys.argv[1]))