Statistiques
| Branche: | Tag: | Révision :

dockonsurf / modules / calculation.py @ 0db30d07

Historique | Voir | Annoter | Télécharger (5,16 ko)

1
import os
2
import logging
3

    
4
logger = logging.getLogger('DockOnSurf')
5

    
6

    
7
def check_bak(file_name):
8
    """Checks if a file already exists and backs it up if so.
9
    @param file_name: file to be checked if exists
10
    """
11
    new_name = file_name
12
    bak_num = 0
13
    while os.path.isdir(new_name) or os.path.isfile(new_name):
14
        bak_num += 1
15
        new_name = new_name.split(".bak")[0] + f".bak{bak_num}"
16
    if bak_num > 0:
17
        os.rename(file_name, new_name)
18
        logger.warning(f"'{file_name}' already present. Backed it up to "
19
                       f"{new_name}")
20

    
21

    
22
def prep_cp2k(inp_file, run_type, atms_list):
23
    """Prepares the directories to run isolated calculation with CP2K.
24

25
    @param inp_file: CP2K Input file to run the calculations with.
26
    @param run_type: Type of calculation. 'isolated', 'screening' or
27
        'refinement'
28
    @param atms_list: list of ase.Atoms objects to run the calculation of.
29
    @return: None
30
    """
31
    from shutil import copy
32
    import ase.io
33
    from pycp2k import CP2K
34
    cp2k = CP2K()
35
    cp2k.parse(inp_file)
36
    force_eval = cp2k.CP2K_INPUT.FORCE_EVAL_list[0]
37
    if force_eval.SUBSYS.TOPOLOGY.Coord_file_name is None:
38
        logger.warning("'COORD_FILE_NAME' not specified on CP2K input. Using\n"
39
                       "default name 'coord.xyz'. A new CP2K input file with "
40
                       "the 'COORD_FILE_NAME' variable is created. If there\n"
41
                       "is a name conflict the old file will be backed up")
42
        force_eval.SUBSYS.TOPOLOGY.Coord_file_name = 'coord.xyz'
43
        print(inp_file.split('/')[-1])
44
        check_bak(inp_file.split('/')[-1])
45
        cp2k.write_input_file(inp_file.split('/')[-1])
46

    
47
    coord_file = force_eval.SUBSYS.TOPOLOGY.Coord_file_name
48

    
49
    # Creating and setting up directories for every configuration.
50
    for i, conf in enumerate(atms_list):
51
        os.mkdir(f'{run_type}/conf_{i}')
52
        copy(inp_file, f'{run_type}/conf_{i}/')
53
        ase.io.write(f'{run_type}/conf_{i}/{coord_file}', conf)
54

    
55

    
56
def get_jobs_status_sge(job_ids):  # TODO more elegant
57
    """Returns a list of job status for a list of job ids.
58

59
    @param job_ids: list of all jobs to be checked their status.
60
    @return: list of status for every job.
61
    """
62
    from gridtk.tools import qstat
63
    run_chk = 'usage         1'
64
    status_list = []
65
    for job in job_ids:
66
        if run_chk in qstat(job):
67
            status_list.append('r')
68
        elif len(qstat(job)) > 0:
69
            status_list.append('q')
70
        else:
71
            status_list.append('f')
72
    return status_list
73

    
74

    
75
def sub_sge(run_type, sub_script, max_qw, name):
76
    """Submits jobs to the sge queuing system with the provided script
77

78
    @param run_type: Type of calculation. 'isolated', 'screening', 'refinement'
79
    @param sub_script: script for the job submission.
80
    @param max_qw: Maximum number of simultaneous jobs waiting to be executed.
81
    @param name: name of the project
82
    """
83
    from shutil import copy
84
    from time import sleep
85
    from gridtk.tools import qsub  # TODO CHANGE TO DRMAA
86
    subm_jobs = []
87
    init_dir = os.getcwd()
88
    for conf in os.listdir(run_type):
89
        i = conf.split('_')[1]
90
        while get_jobs_status_sge(subm_jobs).count('q') >= max_qw:
91
            sleep(30)
92
        copy(sub_script, f"{run_type}/{conf}")
93
        os.chdir(f"{run_type}/{conf}")
94
        job_name = f'{name[:6].capitalize()}{run_type[:3].capitalize()}{i}'
95
        subm_jobs.append(qsub(sub_script, name=job_name))
96
        os.chdir(init_dir)
97

    
98
        logger.info('All jobs have been submitted, waiting them to finish')
99
        while not all([stat == 'f' for stat in get_jobs_status_sge(subm_jobs)]):
100
            sleep(30)
101
        logger.info('All jobs have finished')
102

    
103

    
104
def sub_lsf(run_type, sub_script, max_qw, name):
105
    pass
106

    
107

    
108
def run_calc(run_type, inp_vars, atms_list):
109
    """Directs the calculation run according to the provided arguments.
110

111
    @param run_type: Type of calculation. 'isolated', 'screening' or
112
    'refinement'
113
    @param inp_vars: Calculation parameters from input file.
114
    @param atms_list: List of ase.Atoms objects containing the sets of atoms
115
    aimed to run the calculations of.
116
    """
117
    run_types = ['isolated', 'screening', 'refinement']
118
    run_type_err = f"'run_type' must be one of the following: {run_types}"
119
    if not isinstance(run_type, str) or run_type.lower() not in run_types:
120
        logger.error(run_type_err)
121
        raise ValueError(run_type_err)
122

    
123
    logger.info(f"Running {run_type} calculation with {inp_vars['code']} on "
124
                f"{inp_vars['batch_q_sys']}")
125
    check_bak(run_type)
126
    os.mkdir(run_type)
127
    if inp_vars['code'] == 'cp2k':
128
        prep_cp2k(inp_vars['isol_inp_file'], run_type, atms_list)
129
    # elif: inp_vars['code'] == 'Other codes here'
130

    
131
    if inp_vars['batch_q_sys'] == 'sge':
132
        sub_sge(run_type, inp_vars['subm_script'], inp_vars['max_qw'],
133
                inp_vars['project_name'])
134
    elif inp_vars['batch_q_sys'] == 'lsf': # TODO implement lsf
135
        sub_lsf(run_type, inp_vars['subm_script'], inp_vars['max_qw'],
136
                inp_vars['project_name'])
137
    elif inp_vars['batch_q_sys'] == 'local':  # TODO implement local
138
        pass # run_local
139
    elif inp_vars['batch_q_sys'] == 'none':
140
        pass