Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / dos_input.py @ 58ede1f9

Historique | Voir | Annoter | Télécharger (40,81 ko)

1
"""Functions to deal with DockOnSurf input files.
2

3
Functions
4
try_command:Tries to run a command and logs its exceptions (expected and not).
5
str2lst: Converts a string of integers, and groups of them, to a list of lists.
6
check_expect_val: Checks whether the value of an option has an adequate value.
7
read_input: Sets up the calculation by reading the parameters from input file.
8
get_run_type: Gets 'run_type' value and checks that its value is acceptable.
9
get_code: Gets 'code' value and checks that its value is acceptable.
10
get_batch_q_sys: Gets 'batch_q_sys' value and checks that its value is
11
acceptable.
12
get_relaunch_err: Gets 'relaunch_err' value and checks that its value is
13
acceptable.
14
get_max_jobs: Gets 'max_jobs' value and checks that its value is acceptable.
15
get_special_atoms: Gets 'special_atoms' value and checks that its value is
16
acceptable.
17
get_isol_inp_file: Gets 'isol_inp_file' value and checks that its value is
18
acceptable.
19
get_cluster_magns: Gets 'cluster_magns' value and checks that its value is
20
acceptable.
21
get_num_conformers: Gets 'num_conformers' value and checks that its value is
22
acceptable.
23
get_num_prom_cand: Gets 'num_prom_cand' value and checks that its value is
24
acceptable.
25
get_iso_rmsd: Gets 'iso_rmsd' value and checks that its value is acceptable.
26
get_pre_opt: Gets 'pre_opt' value and checks that its value is acceptable.
27
get_screen_inp_file: Gets 'screen_inp_file' value and checks that its value is
28
acceptable.
29
get_sites: Gets 'sites' value and checks that its value is acceptable.
30
get_molec_ctrs: Gets 'molec_ctrs' value and checks that its value is
31
acceptable.
32
get_try_disso: Gets 'try_disso' value and checks that its value is acceptable.
33
get_pts_per_angle: Gets 'pts_per_angle' value and checks that its value is
34
acceptable.
35
get_coll_thrsld: Gets 'coll_thrsld' value and checks that its value is
36
acceptable.
37
get_screen_rmsd: Gets 'screen_rmsd' value and checks that its value is
38
acceptable.
39
get_coll_bottom_z: Gets 'coll_bottom_z' value and checks that its value is
40
acceptable.
41
get_refine_inp_file: Gets 'refine_inp_file' value and checks that its value is
42
acceptable.
43
get_energy_cutoff: Gets 'energy_cutoff' value and checks that its value is
44
acceptable.
45
"""
46
import os.path
47
import logging
48
from configparser import ConfigParser, NoSectionError, NoOptionError, \
49
    MissingSectionHeaderError, DuplicateOptionError
50
import numpy as np
51
from modules.utilities import try_command
52

    
53
logger = logging.getLogger('DockOnSurf')
54

    
55
dos_inp = ConfigParser(inline_comment_prefixes='#',
56
                       empty_lines_in_values=False)
57

    
58
new_answers = {'n': False, 'none': False, 'nay': False,
59
               'y': True, '': True, 'aye': True, 'sure': True}
60
for answer, val in new_answers.items():
61
    dos_inp.BOOLEAN_STATES[answer] = val  # TODO Check value 0
62
turn_false_answers = [answer for answer in dos_inp.BOOLEAN_STATES
63
                      if dos_inp.BOOLEAN_STATES[answer] is False]
64
turn_true_answers = [answer for answer in dos_inp.BOOLEAN_STATES
65
                     if dos_inp.BOOLEAN_STATES[answer]]
66

    
67
no_sect_err = "Section '%s' not found on input file"
68
no_opt_err = "Option '%s' not found on section '%s'"
69
num_error = "'%s' value must be a %s"
70

    
71

    
72
# Auxilary functions
73

    
74
def str2lst(cmplx_str, func=int):  # TODO: enable deeper level of nested lists
75
    # TODO Treat all-enclosing parenthesis as a list instead of list of lists.
76
    """Converts a string of integers, and groups of them, to a list.
77

78
    Keyword arguments:
79
    @param cmplx_str: str, string of integers (or floats) and groups of them
80
    enclosed by parentheses-like characters.
81
    - Group enclosers: '()' '[]' and '{}'.
82
    - Separators: ',' ';' and ' '.
83
    - Nested groups are not allowed: '3 ((6 7) 8) 4'.
84
    @param func: either to use int or float
85

86
    @return list, list of integers (or floats), or list of integers (or floats)
87
    in the case they were grouped. First, the singlets are placed, and then the
88
    groups in input order.
89

90
    eg. '128,(135 138;141] 87 {45, 68}' -> [128, 87, [135, 138, 141], [45, 68]]
91
    """
92

    
93
    # Checks
94
    error_msg = "Function argument should be a str, sequence of " \
95
                "numbers separated by ',' ';' or ' '." \
96
                "\nThey can be grouped in parentheses-like enclosers: '()', " \
97
                "'[]' or {}. Nested groups are not allowed. \n" \
98
                "eg. 128,(135 138;141) 87 {45, 68}"
99
    cmplx_str = try_command(cmplx_str.replace, [(AttributeError, error_msg)],
100
                            ',', ' ')
101

    
102
    cmplx_str = cmplx_str.replace(';', ' ').replace('[', '(').replace(
103
        ']', ')').replace('{', '(').replace('}', ')')
104

    
105
    try_command(list, [(ValueError, error_msg)], map(func, cmplx_str.replace(
106
        ')', '').replace('(', '').split()))
107

    
108
    deepness = 0
109
    for el in cmplx_str.split():
110
        if '(' in el:
111
            deepness += 1
112
        if ')' in el:
113
            deepness += -1
114
        if deepness > 1 or deepness < 0:
115
            logger.error(error_msg)
116
            raise ValueError(error_msg)
117

    
118
    init_list = cmplx_str.split()
119
    start_group = []
120
    end_group = []
121
    for i, element in enumerate(init_list):
122
        if '(' in element:
123
            start_group.append(i)
124
            init_list[i] = element.replace('(', '')
125
        if ')' in element:
126
            end_group.append(i)
127
            init_list[i] = element.replace(')', '')
128

    
129
    init_list = list(map(func, init_list))
130

    
131
    new_list = []
132
    for start_el, end_el in zip(start_group, end_group):
133
        new_list.append(init_list[start_el:end_el + 1])
134

    
135
    for v in new_list:
136
        for el in v:
137
            init_list.remove(el)
138
    return init_list + new_list
139

    
140

    
141
def check_expect_val(value, expect_vals, err_msg=None):
142
    """Checks whether an option lies within its expected values.
143

144
    Keyword arguments:
145
    @param value: The variable to check if its value lies within the expected
146
    ones
147
    @param expect_vals: list, list of values allowed for the present option.
148
    @param err_msg: The error message to be prompted in both log and screen.
149
    @raise ValueError: if the value is not among the expected ones.
150
    @return True if the value is among the expected ones.
151
    """
152
    if err_msg is None:
153
        err_msg = f"'{value}' is not an adequate value.\n" \
154
                  f"Adequate values: {expect_vals}"
155
    if not any([exp_val == value for exp_val in expect_vals]):
156
        logger.error(err_msg)
157
        raise ValueError(err_msg)
158

    
159
    return True
160

    
161

    
162
def check_inp_files(inp_files, code, potcar_dir):
163
    if code == 'cp2k':
164
        from pycp2k import CP2K
165
        if not isinstance(inp_files, str):
166
            err_msg = "When using CP2K, only one input file is allowed"
167
            logger.error(err_msg)
168
            ValueError(err_msg)
169
        elif not os.path.isfile(inp_files):
170
            err_msg = f"Input file {inp_files} was not found."
171
            logger.error(err_msg)
172
            raise FileNotFoundError(err_msg)
173
        cp2k = CP2K()
174
        try_command(cp2k.parse,
175
                    [(UnboundLocalError, "Invalid CP2K input file")], inp_files)
176
    elif code == "vasp":
177
        if not potcar_dir:
178
            mand_files = ["INCAR", "KPOINTS", "POTCAR"]
179
        else:
180
            mand_files = ["INCAR", "KPOINTS"]
181
            if any("POTCAR" in inp_file for inp_file in inp_files):
182
                logger.warning("A POTCAR file was specified as input file "
183
                               "while the automatic generation of POTCAR was "
184
                               "also enabled via the 'potcar_dir' keyword. The "
185
                               "POTCAR specified as input_file will be used "
186
                               "instead of the auto-generated one.")
187
        # Check that it inp_files is a list of file paths
188
        if not isinstance(inp_files, list) and all(isinstance(inp_file, str)
189
                                                   for inp_file in inp_files):
190
            err_msg = "'inp_files' should be a list of file names/paths"
191
            logger.error(err_msg)
192
            raise ValueError(err_msg)
193
        # Check that all mandatory files are defined once and just once.
194
        elif [[mand_file in inp_file for inp_file in inp_files].count(True)
195
              for mand_file in mand_files].count(1) != len(mand_files):
196
            err_msg = f"Each of the mandatory files {mand_files} must be " \
197
                      f"defined once and just once."
198
            logger.error(err_msg)
199
            raise FileNotFoundError(err_msg)
200
        # Check that the defined files exist
201
        elif any(not os.path.isfile(inp_file) for inp_file in inp_files):
202
            err_msg = f"At least one of the mandatory files {mand_files} was " \
203
                      "not found."
204
            logger.error(err_msg)
205
            raise FileNotFoundError(err_msg)
206
        # Check that mandatory files are actual vasp files.
207
        else:
208
            from pymatgen.io.vasp.inputs import Incar, Kpoints, Potcar
209
            for inp_file in inp_files:
210
                file_name = inp_file.split("/")[-1]
211
                if not any(mand_file in file_name for mand_file in mand_files):
212
                    continue
213
                file_type = ""
214
                for mand_file in mand_files:
215
                    if mand_file in inp_file:
216
                        file_type = mand_file
217
                err = False
218
                err_msg = f"'{inp_file}' is not a valid {file_name} file."
219
                try:
220
                    eval(file_type.capitalize()).from_file(inp_file)
221
                except ValueError:
222
                    logger.error(err_msg)
223
                    err = ValueError(err_msg)
224
                except IndexError:
225
                    logger.error(err_msg)
226
                    err = IndexError(err_msg)
227
                else:
228
                    if file_name == "INCAR":
229
                        Incar.from_file("INCAR").check_params()
230
                finally:
231
                    if isinstance(err, BaseException):
232
                        raise err
233

    
234

    
235
# Global
236

    
237
def get_run_type():
238
    isolated, screening, refinement = (False, False, False)
239
    run_type_vals = ['isolated', 'screening', 'refinement', 'adsorption',
240
                     'full']
241
    run_types = dos_inp.get('Global', 'run_type').split()
242
    for run_type in run_types:
243
        check_expect_val(run_type.lower(), run_type_vals)
244
        if 'isol' in run_type.lower():
245
            isolated = True
246
        if 'screen' in run_type.lower():
247
            screening = True
248
        if 'refine' in run_type.lower():
249
            refinement = True
250
        if 'adsor' in run_type.lower():
251
            screening, refinement = (True, True)
252
        if 'full' in run_type.lower():
253
            isolated, screening, refinement = (True, True, True)
254

    
255
    return isolated, screening, refinement
256

    
257

    
258
def get_code():
259
    code_vals = ['cp2k', 'vasp']
260
    check_expect_val(dos_inp.get('Global', 'code').lower(), code_vals)
261
    code = dos_inp.get('Global', 'code').lower()
262
    return code
263

    
264

    
265
def get_batch_q_sys():
266
    batch_q_sys_vals = ['sge', 'lsf', 'irene', 'local'] + turn_false_answers
267
    check_expect_val(dos_inp.get('Global', 'batch_q_sys').lower(),
268
                     batch_q_sys_vals)
269
    batch_q_sys = dos_inp.get('Global', 'batch_q_sys').lower()
270
    if batch_q_sys.lower() in turn_false_answers:
271
        return False
272
    else:
273
        return batch_q_sys
274

    
275

    
276
def get_pbc_cell():
277
    from ase.atoms import Cell
278
    err_msg = "'pbc_cell' must be either 3 vectors of size 3 or False."
279
    pbc_cell_str = dos_inp.get('Global', 'pbc_cell', fallback="False")
280
    if pbc_cell_str.lower() in turn_false_answers:
281
        return False
282
    else:
283
        pbc_cell = np.array(try_command(str2lst, [(ValueError, err_msg)],
284
                                        pbc_cell_str, float))
285
        if pbc_cell.shape != (3, 3):
286
            logger.error(err_msg)
287
            raise ValueError(err_msg)
288
        if np.linalg.det(pbc_cell) == 0.0:
289
            err_msg = "The volume of the defined cell is 0"
290
            logger.error(err_msg)
291
            raise ValueError(err_msg)
292
        return Cell(pbc_cell)
293

    
294

    
295
def get_subm_script():
296
    subm_script = dos_inp.get('Global', 'subm_script')
297
    if not os.path.isfile(subm_script):
298
        logger.error(f'File {subm_script} not found.')
299
        raise FileNotFoundError(f'File {subm_script} not found')
300
    return subm_script
301

    
302

    
303
def get_project_name():
304
    project_name = dos_inp.get('Global', 'project_name', fallback='')
305
    return project_name
306

    
307

    
308
def get_relaunch_err():
309
    relaunch_err_vals = ['geo_not_conv']
310
    relaunch_err = dos_inp.get('Global', 'relaunch_err',
311
                               fallback="False")
312
    if relaunch_err.lower() in turn_false_answers:
313
        return False
314
    else:
315
        check_expect_val(relaunch_err.lower(), relaunch_err_vals)
316
    return relaunch_err
317

    
318

    
319
def get_max_jobs():
320
    import re
321
    err_msg = "'max_jobs' must be a list of, number plus 'p', 'q' or 'r', or " \
322
              "a combination of them without repeating letters.\n" \
323
              "eg: '2r 3p 4pr', '5q' or '3r 3p'"
324
    max_jobs_str = dos_inp.get('Global', 'max_jobs', fallback="inf").lower()
325
    str_vals = ["r", "p", "q", "rp", "rq", "pr", "qr"]
326
    max_jobs = {"r": np.inf, "p": np.inf, "rp": np.inf}
327
    if "inf" == max_jobs_str:
328
        return {"r": np.inf, "p": np.inf, "rp": np.inf}
329
    # Iterate over the number of requirements:
330
    for req in max_jobs_str.split():
331
        # Split numbers from letters into a list
332
        req_parts = re.findall(r'[a-z]+|\d+', req)
333
        if len(req_parts) != 2:
334
            logger.error(err_msg)
335
            raise ValueError(err_msg)
336
        if req_parts[0].isdecimal():
337
            req_parts[1] = req_parts[1].replace('q', 'p').replace('pr', 'rp')
338
            if req_parts[1] in str_vals and max_jobs[req_parts[1]] == np.inf:
339
                max_jobs[req_parts[1]] = int(req_parts[0])
340
        elif req_parts[1].isdecimal():
341
            req_parts[0] = req_parts[0].replace('q', 'p').replace('pr', 'rp')
342
            if req_parts[0] in str_vals and max_jobs[req_parts[0]] == np.inf:
343
                max_jobs[req_parts[0]] = int(req_parts[1])
344
        else:
345
            logger.error(err_msg)
346
            raise ValueError(err_msg)
347

    
348
    return max_jobs
349

    
350

    
351
def get_special_atoms():
352
    from ase.data import chemical_symbols
353

    
354
    spec_at_err = '\'special_atoms\' does not have an adequate format.\n' \
355
                  'Adequate format: (Fe1 Fe) (O1 O)'
356
    special_atoms = dos_inp.get('Global', 'special_atoms', fallback="False")
357
    if special_atoms.lower() in turn_false_answers:
358
        special_atoms = []
359
    else:
360
        # Converts the string into a list of tuples
361
        lst_tple = [tuple(pair.replace("(", "").split()) for pair in
362
                    special_atoms.split(")")[:-1]]
363
        if len(lst_tple) == 0:
364
            logger.error(spec_at_err)
365
            raise ValueError(spec_at_err)
366
        for i, tup in enumerate(lst_tple):
367
            if not isinstance(tup, tuple) or len(tup) != 2:
368
                logger.error(spec_at_err)
369
                raise ValueError(spec_at_err)
370
            if tup[1].capitalize() not in chemical_symbols:
371
                elem_err = "The second element of the couple should be an " \
372
                           "actual element of the periodic table"
373
                logger.error(elem_err)
374
                raise ValueError(elem_err)
375
            if tup[0].capitalize() in chemical_symbols:
376
                elem_err = "The first element of the couple is already an " \
377
                           "actual element of the periodic table, "
378
                logger.error(elem_err)
379
                raise ValueError(elem_err)
380
            for j, tup2 in enumerate(lst_tple):
381
                if j <= i:
382
                    continue
383
                if tup2[0] == tup[0]:
384
                    label_err = f'You have specified the label {tup[0]} to ' \
385
                                f'more than one special atom'
386
                    logger.error(label_err)
387
                    raise ValueError(label_err)
388
        special_atoms = lst_tple
389
    return special_atoms
390

    
391

    
392
def get_potcar_dir():
393
    potcar_dir = dos_inp.get('Global', 'potcar_dir', fallback="False")
394
    if potcar_dir.lower() in turn_false_answers:
395
        return False
396
    elif not os.path.isdir(potcar_dir):
397
        err_msg = "'potcar_dir' must be either False or a directory."
398
        logger.error(err_msg)
399
        raise ValueError(err_msg)
400
    else:
401
        return potcar_dir
402

    
403

    
404
# Isolated
405

    
406
def get_isol_inp_file(code, potcar_dir):  # TODO allow spaces in path names
407
    inp_file_lst = dos_inp.get('Isolated', 'isol_inp_file').split()
408
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
409
                    code, potcar_dir)
410
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
411

    
412

    
413
def get_molec_file():
414
    molec_file = dos_inp.get('Isolated', 'molec_file')
415
    if not os.path.isfile(molec_file):
416
        logger.error(f'File {molec_file} not found.')
417
        raise FileNotFoundError(f'File {molec_file} not found')
418
    return molec_file
419

    
420

    
421
def get_num_conformers():
422
    err_msg = num_error % ('num_conformers', 'positive integer')
423
    num_conformers = try_command(dos_inp.getint, [(ValueError, err_msg)],
424
                                 'Isolated', 'num_conformers', fallback=100)
425
    if num_conformers < 1:
426
        logger.error(err_msg)
427
        raise ValueError(err_msg)
428
    return num_conformers
429

    
430

    
431
def get_pre_opt():
432
    pre_opt_vals = ['uff', 'mmff'] + turn_false_answers
433
    check_expect_val(dos_inp.get('Isolated', 'pre_opt').lower(), pre_opt_vals)
434
    pre_opt = dos_inp.get('Isolated', 'pre_opt').lower()
435
    if pre_opt in turn_false_answers:
436
        return False
437
    else:
438
        return pre_opt
439

    
440

    
441
# Screening
442

    
443
def get_screen_inp_file(code, potcar_dir):  # TODO allow spaces in path names
444
    inp_file_lst = dos_inp.get('Screening', 'screen_inp_file').split()
445
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
446
                    code, potcar_dir)
447
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
448

    
449

    
450
def get_surf_file():
451
    surf_file = dos_inp.get('Screening', 'surf_file')
452
    if not os.path.isfile(surf_file):
453
        logger.error(f'File {surf_file} not found.')
454
        raise FileNotFoundError(f'File {surf_file} not found')
455
    return surf_file
456

    
457

    
458
def get_sites():
459
    err_msg = 'The value of sites should be a list of atom numbers ' \
460
              '(ie. positive integers) or groups of atom numbers ' \
461
              'grouped by parentheses-like enclosers. \n' \
462
              'eg. 128,(135 138;141) 87 {45, 68}'
463
    # Convert the string into a list of lists
464
    sites = try_command(str2lst,
465
                        [(ValueError, err_msg), (AttributeError, err_msg)],
466
                        dos_inp.get('Screening', 'sites'))
467
    # Check all elements of the list (of lists) are positive integers
468
    for site in sites:
469
        if type(site) is list:
470
            for atom in site:
471
                if atom < 0:
472
                    logger.error(err_msg)
473
                    raise ValueError(err_msg)
474
        elif type(site) is int:
475
            if site < 0:
476
                logger.error(err_msg)
477
                raise ValueError(err_msg)
478
        else:
479
            logger.error(err_msg)
480
            raise ValueError(err_msg)
481

    
482
    return sites
483

    
484

    
485
def get_surf_ctrs2():
486
    err_msg = 'The value of surf_ctrs2 should be a list of atom numbers ' \
487
              '(ie. positive integers) or groups of atom numbers ' \
488
              'grouped by parentheses-like enclosers. \n' \
489
              'eg. 128,(135 138;141) 87 {45, 68}'
490
    # Convert the string into a list of lists
491
    surf_ctrs2 = try_command(str2lst,
492
                             [(ValueError, err_msg), (AttributeError, err_msg)],
493
                             dos_inp.get('Screening', 'surf_ctrs2'))
494
    # Check all elements of the list (of lists) are positive integers
495
    for ctr in surf_ctrs2:
496
        if type(ctr) is list:
497
            for atom in ctr:
498
                if atom < 0:
499
                    logger.error(err_msg)
500
                    raise ValueError(err_msg)
501
        elif type(ctr) is int:
502
            if ctr < 0:
503
                logger.error(err_msg)
504
                raise ValueError(err_msg)
505
        else:
506
            logger.error(err_msg)
507
            raise ValueError(err_msg)
508

    
509
    return surf_ctrs2
510

    
511

    
512
def get_molec_ctrs():
513
    err_msg = 'The value of molec_ctrs should be a list of atom' \
514
              ' numbers (ie. positive integers) or groups of atom ' \
515
              'numbers enclosed by parentheses-like characters. \n' \
516
              'eg. 128,(135 138;141) 87 {45, 68}'
517
    # Convert the string into a list of lists
518
    molec_ctrs = try_command(str2lst,
519
                             [(ValueError, err_msg),
520
                              (AttributeError, err_msg)],
521
                             dos_inp.get('Screening', 'molec_ctrs'))
522
    # Check all elements of the list (of lists) are positive integers
523
    for ctr in molec_ctrs:
524
        if isinstance(ctr, list):
525
            for atom in ctr:
526
                if atom < 0:
527
                    logger.error(err_msg)
528
                    raise ValueError(err_msg)
529
        elif isinstance(ctr, int):
530
            if ctr < 0:
531
                logger.error(err_msg)
532
                raise ValueError(err_msg)
533
        else:
534
            logger.error(err_msg)
535
            raise ValueError(err_msg)
536

    
537
    return molec_ctrs
538

    
539

    
540
def get_molec_ctrs2():
541
    err_msg = 'The value of molec_ctrs2 should be a list of atom ' \
542
              'numbers (ie. positive integers) or groups of atom ' \
543
              'numbers enclosed by parentheses-like characters. \n' \
544
              'eg. 128,(135 138;141) 87 {45, 68}'
545
    # Convert the string into a list of lists
546
    molec_ctrs2 = try_command(str2lst, [(ValueError, err_msg),
547
                                        (AttributeError, err_msg)],
548
                              dos_inp.get('Screening', 'molec_ctrs2'))
549

    
550
    # Check all elements of the list (of lists) are positive integers
551
    for ctr in molec_ctrs2:
552
        if isinstance(ctr, list):
553
            for atom in ctr:
554
                if atom < 0:
555
                    logger.error(err_msg)
556
                    raise ValueError(err_msg)
557
        elif isinstance(ctr, int):
558
            if ctr < 0:
559
                logger.error(err_msg)
560
                raise ValueError(err_msg)
561
        else:
562
            logger.error(err_msg)
563
            raise ValueError(err_msg)
564

    
565
    return molec_ctrs2
566

    
567

    
568
def get_molec_ctrs3():
569
    err_msg = 'The value of molec_ctrs3 should be a list of atom ' \
570
              'numbers (ie. positive integers) or groups of atom ' \
571
              'numbers enclosed by parentheses-like characters. \n' \
572
              'eg. 128,(135 138;141) 87 {45, 68}'
573
    # Convert the string into a list of lists
574
    molec_ctrs3 = try_command(str2lst, [(ValueError, err_msg),
575
                                        (AttributeError, err_msg)],
576
                              dos_inp.get('Screening', 'molec_ctrs3'))
577

    
578
    # Check all elements of the list (of lists) are positive integers
579
    for ctr in molec_ctrs3:
580
        if isinstance(ctr, list):
581
            for atom in ctr:
582
                if atom < 0:
583
                    logger.error(err_msg)
584
                    raise ValueError(err_msg)
585
        elif isinstance(ctr, int):
586
            if ctr < 0:
587
                logger.error(err_msg)
588
                raise ValueError(err_msg)
589
        else:
590
            logger.error(err_msg)
591
            raise ValueError(err_msg)
592

    
593
    return molec_ctrs3
594

    
595

    
596
def get_max_helic_angle():
597
    err_msg = "'max_helic_angle' must be a positive number in degrees"
598
    max_helic_angle = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
599
                                  'Screening', 'max_helic_angle',
600
                                  fallback=180.0)
601
    if max_helic_angle < 0:
602
        logger.error(err_msg)
603
        raise ValueError(err_msg)
604

    
605
    return max_helic_angle
606

    
607

    
608
def get_select_magns():
609
    select_magns_vals = ['energy', 'moi']
610
    select_magns_str = dos_inp.get('Screening', 'select_magns',
611
                                   fallback='moi')
612
    select_magns_str.replace(',', ' ').replace(';', ' ')
613
    select_magns = select_magns_str.split(' ')
614
    select_magns = [m.lower() for m in select_magns]
615
    for m in select_magns:
616
        check_expect_val(m, select_magns_vals)
617
    return select_magns
618

    
619

    
620
def get_confs_per_magn():
621
    err_msg = num_error % ('confs_per_magn', 'positive integer')
622
    confs_per_magn = try_command(dos_inp.getint, [(ValueError, err_msg)],
623
                                 'Screening', 'confs_per_magn', fallback=2)
624
    if confs_per_magn <= 0:
625
        logger.error(err_msg)
626
        raise ValueError(err_msg)
627
    return confs_per_magn
628

    
629

    
630
def get_surf_norm_vect():
631
    err = "'surf_norm_vect' must be a 3 component vector, 'x', 'y' or 'z', " \
632
          "'auto' or 'asann'."
633
    cart_axes = {'x': [1.0, 0.0, 0.0], '-x': [-1.0, 0.0, 0.0],
634
                 'y': [0.0, 1.0, 0.0], '-y': [0.0, -1.0, 0.0],
635
                 'z': [0.0, 0.0, 1.0], '-z': [0.0, 0.0, -1.0]}
636
    surf_norm_vect_str = dos_inp.get('Screening', 'surf_norm_vect',
637
                                     fallback="auto").lower()
638
    if surf_norm_vect_str == "asann" or surf_norm_vect_str == "auto":
639
        return 'auto'
640
    if surf_norm_vect_str in cart_axes:
641
        return np.array(cart_axes[surf_norm_vect_str])
642
    surf_norm_vect = try_command(str2lst, [(ValueError, err)],
643
                                 surf_norm_vect_str, float)
644
    if len(surf_norm_vect) != 3:
645
        logger.error(err)
646
        raise ValueError(err)
647

    
648
    return np.array(surf_norm_vect) / np.linalg.norm(surf_norm_vect)
649

    
650

    
651
def get_adsorption_height():
652
    err_msg = num_error % ('adsorption_height', 'positive number')
653
    ads_height = try_command(dos_inp.getfloat, [(ValueError, err_msg)],
654
                             'Screening', 'adsorption_height', fallback=2.5)
655
    if ads_height <= 0:
656
        logger.error(err_msg)
657
        raise ValueError(err_msg)
658
    return ads_height
659

    
660

    
661
def get_set_angles():
662
    set_vals = ['euler', 'internal']
663
    check_expect_val(dos_inp.get('Screening', 'set_angles').lower(), set_vals)
664
    set_angles = dos_inp.get('Screening', 'set_angles',
665
                             fallback='euler').lower()
666
    return set_angles
667

    
668

    
669
def get_pts_per_angle():
670
    err_msg = num_error % ('sample_points_per_angle', 'positive integer')
671
    pts_per_angle = try_command(dos_inp.getint,
672
                                [(ValueError, err_msg)],
673
                                'Screening', 'sample_points_per_angle',
674
                                fallback=3)
675
    if pts_per_angle <= 0:
676
        logger.error(err_msg)
677
        raise ValueError(err_msg)
678
    return pts_per_angle
679

    
680

    
681
def get_max_structures():
682
    err_msg = num_error % ('max_structures', 'positive integer')
683
    max_structs = dos_inp.get('Screening', 'max_structures', fallback="False")
684
    if max_structs.lower() in turn_false_answers:
685
        return np.inf
686
    if try_command(int, [(ValueError, err_msg)], max_structs) <= 0:
687
        logger.error(err_msg)
688
        raise ValueError(err_msg)
689
    return int(max_structs)
690

    
691

    
692
def get_coll_thrsld():
693
    err_msg = num_error % ('collision_threshold', 'positive number')
694
    coll_thrsld_str = dos_inp.get('Screening', 'collision_threshold',
695
                                  fallback="False")
696
    if coll_thrsld_str.lower() in turn_false_answers:
697
        return False
698
    coll_thrsld = try_command(float, [(ValueError, err_msg)], coll_thrsld_str)
699

    
700
    if coll_thrsld <= 0:
701
        logger.error(err_msg)
702
        raise ValueError(err_msg)
703

    
704
    return coll_thrsld
705

    
706

    
707
def get_min_coll_height(norm_vect):
708
    err_msg = num_error % ('min_coll_height', 'decimal number')
709
    min_coll_height = dos_inp.get('Screening', 'min_coll_height',
710
                                  fallback="False")
711
    if min_coll_height.lower() in turn_false_answers:
712
        return False
713
    min_coll_height = try_command(float, [(ValueError, err_msg)],
714
                                  min_coll_height)
715
    cart_axes = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0],
716
                 [-1.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]
717
    err_msg = "'min_coll_height' option is only implemented for " \
718
              "'surf_norm_vect' to be one of the x, y or z axes. "
719
    if not isinstance(norm_vect, str) or norm_vect != 'auto':
720
        check_expect_val(norm_vect.tolist(), cart_axes, err_msg)
721
    return min_coll_height
722

    
723

    
724
def get_exclude_ads_ctr():
725
    err_msg = "exclude_ads_ctr must have a boolean value."
726
    exclude_ads_ctr = try_command(dos_inp.getboolean, [(ValueError, err_msg)],
727
                                  "Screening", "exclude_ads_ctr",
728
                                  fallback=False)
729
    return exclude_ads_ctr
730

    
731

    
732
def get_H_donor(spec_atoms):
733
    from ase.data import chemical_symbols
734
    err_msg = "The value of 'h_donor' must be either False, a chemical symbol "\
735
              "or an atom index"
736
    h_donor_str = dos_inp.get('Screening', 'h_donor', fallback="False")
737
    h_donor = []
738
    if h_donor_str.lower() in turn_false_answers:
739
        return False
740
    err = False
741
    for el in h_donor_str.split():
742
        try:
743
            h_donor.append(int(el))
744
        except ValueError:
745
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
746
                                             for nw_sym in pairs]:
747
                err = True
748
            else:
749
                h_donor.append(el)
750
        finally:
751
            if err:
752
                logger.error(err_msg)
753
                ValueError(err_msg)
754
    return h_donor
755

    
756

    
757
def get_H_acceptor(spec_atoms):
758
    from ase.data import chemical_symbols
759
    err_msg = "The value of 'h_acceptor' must be either 'all', a chemical " \
760
              "symbol or an atom index."
761
    h_acceptor_str = dos_inp.get('Screening', 'h_acceptor', fallback="all")
762
    if h_acceptor_str.lower() == "all":
763
        return "all"
764
    h_acceptor = []
765
    err = False
766
    for el in h_acceptor_str.split():
767
        try:
768
            h_acceptor.append(int(el))
769
        except ValueError:
770
            if el not in chemical_symbols + [nw_sym for pairs in spec_atoms
771
                                             for nw_sym in pairs]:
772
                err = True
773
            else:
774
                h_acceptor.append(el)
775
        finally:
776
            if err:
777
                logger.error(err_msg)
778
                raise ValueError(err_msg)
779
    return h_acceptor
780

    
781

    
782
def get_use_molec_file():
783
    use_molec_file = dos_inp.get('Screening', 'use_molec_file',
784
                                 fallback='False')
785
    if use_molec_file.lower() in turn_false_answers:
786
        return False
787
    if not os.path.isfile(use_molec_file):
788
        logger.error(f'File {use_molec_file} not found.')
789
        raise FileNotFoundError(f'File {use_molec_file} not found')
790

    
791
    return use_molec_file
792

    
793

    
794
# Refinement
795

    
796
def get_refine_inp_file(code, potcar_dir):
797
    inp_file_lst = dos_inp.get('Refinement', 'refine_inp_file').split()
798
    check_inp_files(inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst,
799
                    code, potcar_dir)
800
    return inp_file_lst[0] if len(inp_file_lst) == 1 else inp_file_lst
801

    
802

    
803
def get_energy_cutoff():
804
    err_msg = num_error % ('energy_cutoff', 'positive decimal number')
805
    energy_cutoff = try_command(dos_inp.getfloat,
806
                                [(ValueError, err_msg)],
807
                                'Refinement', 'energy_cutoff', fallback=0.5)
808
    if energy_cutoff < 0:
809
        logger.error(err_msg)
810
        raise ValueError(err_msg)
811
    return energy_cutoff
812

    
813

    
814
# Read input parameters
815

    
816
def read_input(in_file):
817
    from modules.formats import adapt_format
818

    
819
    err_msg = False
820
    try:
821
        dos_inp.read(in_file)
822
    except MissingSectionHeaderError as e:
823
        logger.error('There are options in the input file without a Section '
824
                     'header.')
825
        err_msg = e
826
    except DuplicateOptionError as e:
827
        logger.error('There is an option in the input file that has been '
828
                     'specified more than once.')
829
        err_msg = e
830
    except Exception as e:
831
        err_msg = e
832
    else:
833
        err_msg = False
834
    finally:
835
        if isinstance(err_msg, BaseException):
836
            raise err_msg
837

    
838
    inp_vars = {}
839

    
840
    # Global
841
    if not dos_inp.has_section('Global'):
842
        logger.error(no_sect_err % 'Global')
843
        raise NoSectionError('Global')
844

    
845
    # Mandatory options
846
    # Checks whether the mandatory options 'run_type', 'code', etc. are present.
847
    glob_mand_opts = ['run_type', 'code', 'batch_q_sys']
848
    for opt in glob_mand_opts:
849
        if not dos_inp.has_option('Global', opt):
850
            logger.error(no_opt_err % (opt, 'Global'))
851
            raise NoOptionError(opt, 'Global')
852

    
853
    # Gets which sections are to be carried out
854
    isolated, screening, refinement = get_run_type()
855
    inp_vars['isolated'] = isolated
856
    inp_vars['screening'] = screening
857
    inp_vars['refinement'] = refinement
858
    inp_vars['code'] = get_code()
859
    inp_vars['batch_q_sys'] = get_batch_q_sys()
860

    
861
    # Dependent options:
862
    if inp_vars['batch_q_sys']:
863
        inp_vars['max_jobs'] = get_max_jobs()
864
        if inp_vars['batch_q_sys'] != 'local':
865
            if not dos_inp.has_option('Global', 'subm_script'):
866
                logger.error(no_opt_err % ('subm_script', 'Global'))
867
                raise NoOptionError('subm_script', 'Global')
868
            inp_vars['subm_script'] = get_subm_script()
869
    if inp_vars['code'] == "vasp":
870
        inp_vars['potcar_dir'] = get_potcar_dir()
871

    
872
    # Facultative options (Default/Fallback value present)
873
    inp_vars['pbc_cell'] = get_pbc_cell()
874
    inp_vars['project_name'] = get_project_name()
875
    # inp_vars['relaunch_err'] = get_relaunch_err()
876
    inp_vars['special_atoms'] = get_special_atoms()
877

    
878
    # Isolated
879
    if isolated:
880
        if not dos_inp.has_section('Isolated'):
881
            logger.error(no_sect_err % 'Isolated')
882
            raise NoSectionError('Isolated')
883
        # Mandatory options
884
        # Checks whether the mandatory options are present.
885
        iso_mand_opts = ['isol_inp_file', 'molec_file']
886
        for opt in iso_mand_opts:
887
            if not dos_inp.has_option('Isolated', opt):
888
                logger.error(no_opt_err % (opt, 'Isolated'))
889
                raise NoOptionError(opt, 'Isolated')
890
        inp_vars['isol_inp_file'] = get_isol_inp_file(inp_vars['code'],
891
                                                      inp_vars['potcar_dir'])
892
        inp_vars['molec_file'] = get_molec_file()
893

    
894
        # Checks for PBC
895
        atms = adapt_format('ase', inp_vars['molec_file'],
896
                            inp_vars['special_atoms'])
897
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
898
                and inp_vars['pbc_cell'] is False:
899
            err_msg = "When running calculations with 'VASP', the PBC cell " \
900
                      "should be provided either implicitely inside " \
901
                      "'molec_file' or by setting the 'pbc_cell' option."
902
            logger.error(err_msg)
903
            raise ValueError(err_msg)
904
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
905
            inp_vars['pbc_cell'] = atms.cell
906
            logger.info(f"Obtained pbc_cell from '{inp_vars['molec_file']}' "
907
                        f"file.")
908
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
909
                                                        atms.cell):
910
            logger.warning("'molec_file' has an implicit cell defined "
911
                           f"different than 'pbc_cell'.\n"
912
                           f"'molec_file' = {atms.cell}.\n"
913
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
914
                           "'pbc_cell' value will be used.")
915

    
916
        # Facultative options (Default/Fallback value present)
917
        inp_vars['num_conformers'] = get_num_conformers()
918
        inp_vars['pre_opt'] = get_pre_opt()
919

    
920
    # Screening
921
    if screening:
922
        if not dos_inp.has_section('Screening'):
923
            logger.error(no_sect_err % 'Screening')
924
            raise NoSectionError('Screening')
925
        # Mandatory options:
926
        # Checks whether the mandatory options are present.
927
        screen_mand_opts = ['screen_inp_file', 'surf_file', 'sites',
928
                            'molec_ctrs']
929
        for opt in screen_mand_opts:
930
            if not dos_inp.has_option('Screening', opt):
931
                logger.error(no_opt_err % (opt, 'Screening'))
932
                raise NoOptionError(opt, 'Screening')
933
        inp_vars['screen_inp_file'] = get_screen_inp_file(inp_vars['code'],
934
                                                          inp_vars[
935
                                                              'potcar_dir'])
936
        inp_vars['surf_file'] = get_surf_file()
937
        inp_vars['sites'] = get_sites()
938
        inp_vars['molec_ctrs'] = get_molec_ctrs()
939

    
940
        # Checks for PBC
941
        # Checks for PBC
942
        atms = adapt_format('ase', inp_vars['surf_file'],
943
                            inp_vars['special_atoms'])
944
        if inp_vars['code'] == 'vasp' and np.linalg.det(atms.cell) == 0.0 \
945
                and inp_vars['pbc_cell'] is False:
946
            err_msg = "When running calculations with 'VASP', the PBC cell " \
947
                      "should be provided either implicitely inside " \
948
                      "'surf_file' or by setting the 'pbc_cell' option."
949
            logger.error(err_msg)
950
            raise ValueError(err_msg)
951
        elif inp_vars['pbc_cell'] is False and np.linalg.det(atms.cell) != 0.0:
952
            inp_vars['pbc_cell'] = atms.cell
953
            logger.info(f"Obtained pbc_cell from '{inp_vars['surf_file']}' "
954
                        f"file.")
955
        elif (atms.cell != 0).any() and not np.allclose(inp_vars['pbc_cell'],
956
                                                        atms.cell):
957
            logger.warning("'surf_file' has an implicit cell defined "
958
                           f"different than 'pbc_cell'.\n"
959
                           f"'surf_file' = {atms.cell}.\n"
960
                           f"'pbc_cell' = {inp_vars['pbc_cell']}).\n"
961
                           "'pbc_cell' value will be used.")
962

    
963
        # Facultative options (Default value present)
964
        inp_vars['select_magns'] = get_select_magns()
965
        inp_vars['confs_per_magn'] = get_confs_per_magn()
966
        inp_vars['surf_norm_vect'] = get_surf_norm_vect()
967
        inp_vars['adsorption_height'] = get_adsorption_height()
968
        inp_vars['set_angles'] = get_set_angles()
969
        inp_vars['sample_points_per_angle'] = get_pts_per_angle()
970
        inp_vars['collision_threshold'] = get_coll_thrsld()
971
        inp_vars['min_coll_height'] = get_min_coll_height(
972
            inp_vars['surf_norm_vect'])
973
        if inp_vars['min_coll_height'] is False \
974
                and inp_vars['collision_threshold'] is False:
975
            logger.warning("Collisions are deactivated: Overlapping of "
976
                           "adsorbate and surface is possible")
977
        inp_vars['exclude_ads_ctr'] = get_exclude_ads_ctr()
978
        inp_vars['h_donor'] = get_H_donor(inp_vars['special_atoms'])
979
        inp_vars['max_structures'] = get_max_structures()
980
        inp_vars['use_molec_file'] = get_use_molec_file()
981

    
982
        # Options depending on the value of others
983
        if inp_vars['set_angles'] == "internal":
984
            internal_opts = ['molec_ctrs2', 'molec_ctrs3', 'surf_ctrs2',
985
                             'max_helic_angle']
986
            for opt in internal_opts:
987
                if not dos_inp.has_option('Screening', opt):
988
                    logger.error(no_opt_err % (opt, 'Screening'))
989
                    raise NoOptionError(opt, 'Screening')
990
            inp_vars['max_helic_angle'] = get_max_helic_angle()
991
            inp_vars['molec_ctrs2'] = get_molec_ctrs2()
992
            inp_vars['molec_ctrs3'] = get_molec_ctrs3()
993
            inp_vars['surf_ctrs2'] = get_surf_ctrs2()
994
            if len(inp_vars["molec_ctrs2"]) != len(inp_vars['molec_ctrs']) \
995
                    or len(inp_vars["molec_ctrs3"]) != \
996
                    len(inp_vars['molec_ctrs']) \
997
                    or len(inp_vars['surf_ctrs2']) != len(inp_vars['sites']):
998
                err_msg = "'molec_ctrs' 'molec_ctrs2' and 'molec_ctrs3' must " \
999
                          "have the same number of indices "
1000
                logger.error(err_msg)
1001
                raise ValueError(err_msg)
1002

    
1003
        if inp_vars['h_donor'] is False:
1004
            inp_vars['h_acceptor'] = False
1005
        else:
1006
            inp_vars['h_acceptor'] = get_H_acceptor(inp_vars['special_atoms'])
1007

    
1008
    # Refinement
1009
    if refinement:
1010
        if not dos_inp.has_section('Refinement'):
1011
            logger.error(no_sect_err % 'Refinement')
1012
            raise NoSectionError('Refinement')
1013
        # Mandatory options
1014
        # Checks whether the mandatory options are present.
1015
        ref_mand_opts = ['refine_inp_file']
1016
        for opt in ref_mand_opts:
1017
            if not dos_inp.has_option('Refinement', opt):
1018
                logger.error(no_opt_err % (opt, 'Refinement'))
1019
                raise NoOptionError(opt, 'Refinement')
1020
        inp_vars['refine_inp_file'] = get_refine_inp_file(inp_vars['code'],
1021
                                                          inp_vars[
1022
                                                              'potcar_dir'])
1023

    
1024
        # Facultative options (Default value present)
1025
        inp_vars['energy_cutoff'] = get_energy_cutoff()
1026
        # end energy_cutoff
1027

    
1028
    return_vars_str = "\n\t".join([str(key) + ": " + str(value)
1029
                                   for key, value in inp_vars.items()])
1030
    logger.info(f'Correctly read {in_file} parameters:'
1031
                f' \n\n\t{return_vars_str}\n')
1032

    
1033
    return inp_vars
1034

    
1035

    
1036
if __name__ == "__main__":
1037
    import sys
1038

    
1039
    print(read_input(sys.argv[1]))