|
1
|
|
|
import re |
|
2
|
|
|
from collections import OrderedDict |
|
3
|
|
|
|
|
4
|
|
|
regex_walltime = re.compile("(\d+:){1,4}") |
|
5
|
|
|
regex_resource_nodes = re.compile("[a-zA-Z0-9]+(:ppn=\d+)?(:gpus=\d+)?(:[a-zA-Z0-9]+)*") |
|
6
|
|
|
regex_resource_pmem = re.compile("[0-9]+(b|kb|mb|gb|tb)?") |
|
7
|
|
|
|
|
8
|
|
|
|
|
9
|
|
|
class PBS(object): |
|
10
|
|
|
""" Offers functionalities to manage a PBS file. |
|
11
|
|
|
|
|
12
|
|
|
For more information about the PBS file format see: |
|
13
|
|
|
`http://docs.adaptivecomputing.com/suite/8-0/basic/help.htm#topics/torque/2-jobs/requestingRes.htm?TocPath=TORQUE Resource Manager|Submitting and managing jobs|Job submission|_____3` |
|
14
|
|
|
|
|
15
|
|
|
Parameters |
|
16
|
|
|
---------- |
|
17
|
|
|
queue_name : str |
|
18
|
|
|
name of the queue on which commands will be executed |
|
19
|
|
|
walltime : str |
|
20
|
|
|
maximum time allocated to execute every commands (DD:HH:MM:SS) |
|
21
|
|
|
""" |
|
22
|
|
|
def __init__(self, queue_name, walltime): |
|
23
|
|
|
if queue_name is None or len(queue_name) == 0: |
|
24
|
|
|
raise ValueError("Queue's name must be provided.") |
|
25
|
|
|
|
|
26
|
|
|
self.queue_name = queue_name |
|
27
|
|
|
self.modules = [] |
|
28
|
|
|
self.prolog = [] |
|
29
|
|
|
self.commands = [] |
|
30
|
|
|
self.epilog = [] |
|
31
|
|
|
|
|
32
|
|
|
self.resources = OrderedDict() |
|
33
|
|
|
self.add_resources(walltime=walltime) |
|
34
|
|
|
|
|
35
|
|
|
self.options = OrderedDict() |
|
36
|
|
|
self.add_options(q=queue_name) |
|
37
|
|
|
|
|
38
|
|
|
self.sbatch_options = OrderedDict() |
|
39
|
|
|
|
|
40
|
|
|
# Declares that all environment variables in the qsub command's environment are to be exported to the batch job. |
|
41
|
|
|
self.add_options(V="") |
|
42
|
|
|
|
|
43
|
|
|
def add_options(self, **options): |
|
44
|
|
|
""" Adds options to this PBS file. |
|
45
|
|
|
|
|
46
|
|
|
Parameters |
|
47
|
|
|
---------- |
|
48
|
|
|
**options : dict |
|
49
|
|
|
each key is the name of a PBS option (see `Options`) |
|
50
|
|
|
|
|
51
|
|
|
Options |
|
52
|
|
|
------- |
|
53
|
|
|
*A* : account_string |
|
54
|
|
|
Defines the account string associated with the job. |
|
55
|
|
|
*N* : name (up to 64 characters) |
|
56
|
|
|
Declares a name for the job. It must consist of printable, |
|
57
|
|
|
non white space characters with the first character alphabetic. |
|
58
|
|
|
""" |
|
59
|
|
|
for option_name, option_value in options.items(): |
|
60
|
|
|
# If known option, validate it. |
|
61
|
|
|
if option_name.strip('-') == 'N': |
|
62
|
|
|
if len(option_name) > 64: |
|
63
|
|
|
raise ValueError("Maximum number of characters for the name is: 64") |
|
64
|
|
|
|
|
65
|
|
|
self.options["-" + option_name] = option_value |
|
66
|
|
|
|
|
67
|
|
|
def add_sbatch_options(self, **options): |
|
68
|
|
|
""" Adds sbatch options to this PBS file. |
|
69
|
|
|
|
|
70
|
|
|
Parameters |
|
71
|
|
|
---------- |
|
72
|
|
|
**options : dict |
|
73
|
|
|
each key is the name of a SBATCH option |
|
74
|
|
|
""" |
|
75
|
|
|
|
|
76
|
|
|
for option_name, option_value in options.items(): |
|
77
|
|
|
if len(option_name) == 1: |
|
78
|
|
|
dash = "-" |
|
79
|
|
|
else: |
|
80
|
|
|
dash = "--" |
|
81
|
|
|
self.sbatch_options[dash + option_name] = option_value |
|
82
|
|
|
|
|
83
|
|
|
def add_resources(self, **resources): |
|
84
|
|
|
""" Adds resources to this PBS file. |
|
85
|
|
|
|
|
86
|
|
|
Parameters |
|
87
|
|
|
---------- |
|
88
|
|
|
**resources : dict |
|
89
|
|
|
each key is the name of a PBS resource (see `Resources`) |
|
90
|
|
|
|
|
91
|
|
|
Resources |
|
92
|
|
|
--------- |
|
93
|
|
|
*nodes* : nodes={<node_count>|<hostname>}[:ppn=<ppn>][:gpus=<gpu>][:<property>[:<property>]...] |
|
94
|
|
|
Specifies how many and what type of nodes to use |
|
95
|
|
|
**nodes={<node_count>|<hostname>}**: type of nodes |
|
96
|
|
|
**ppn=#**: Number of process per node requested for this job |
|
97
|
|
|
**gpus=#**: Number of process per node requested for this job |
|
98
|
|
|
**property**: A string specifying a node's feature |
|
99
|
|
|
*pmem*: pmem=[0-9]+(b|kb|mb|gb|tb) |
|
100
|
|
|
Specifies the maximum amount of physical memory used by any single process of the job. |
|
101
|
|
|
""" |
|
102
|
|
|
for resource_name, resource_value in resources.items(): |
|
103
|
|
|
# If known ressource, validate it. |
|
104
|
|
|
if resource_name == 'nodes': |
|
105
|
|
|
if re.match(regex_resource_nodes, str(resource_value)) is None: |
|
106
|
|
|
raise ValueError("Unknown format for PBS resource: nodes") |
|
107
|
|
|
elif resource_name == 'pmem': |
|
108
|
|
|
if re.match(regex_resource_pmem, str(resource_value)) is None: |
|
109
|
|
|
raise ValueError("Unknown format for PBS resource: pmem") |
|
110
|
|
|
elif resource_name == 'walltime': |
|
111
|
|
|
if re.match(regex_walltime, str(resource_value)) is None: |
|
112
|
|
|
raise ValueError("Unknown format for PBS resource: walltime (dd:hh:mm:ss)") |
|
113
|
|
|
|
|
114
|
|
|
self.resources[resource_name] = resource_value |
|
115
|
|
|
|
|
116
|
|
|
def add_modules_to_load(self, *modules): |
|
117
|
|
|
""" Adds modules to load prior to execute the job on a node. |
|
118
|
|
|
|
|
119
|
|
|
Parameters |
|
120
|
|
|
---------- |
|
121
|
|
|
*modules : list of str |
|
122
|
|
|
each string represents the name of the module to load |
|
123
|
|
|
""" |
|
124
|
|
|
self.modules += modules |
|
125
|
|
|
|
|
126
|
|
|
def add_to_prolog(self, *code): |
|
127
|
|
|
""" Adds the code to be executed before the commands. |
|
128
|
|
|
|
|
129
|
|
|
Parameters |
|
130
|
|
|
---------- |
|
131
|
|
|
*code : list of str |
|
132
|
|
|
Each string holds the code to be executed before the commands |
|
133
|
|
|
""" |
|
134
|
|
|
self.prolog += code |
|
135
|
|
|
|
|
136
|
|
|
def add_commands(self, *commands): |
|
137
|
|
|
""" Sets commands to execute on a node. |
|
138
|
|
|
|
|
139
|
|
|
Parameters |
|
140
|
|
|
---------- |
|
141
|
|
|
*commands : list of str |
|
142
|
|
|
each string represents a command that is part of this job |
|
143
|
|
|
""" |
|
144
|
|
|
self.commands += commands |
|
145
|
|
|
|
|
146
|
|
|
def add_to_epilog(self, *code): |
|
147
|
|
|
""" Adds the code to be executed after the commands. |
|
148
|
|
|
|
|
149
|
|
|
Parameters |
|
150
|
|
|
---------- |
|
151
|
|
|
*code : list of str |
|
152
|
|
|
Each string holds the code to be executed after the commands |
|
153
|
|
|
""" |
|
154
|
|
|
self.epilog += code |
|
155
|
|
|
|
|
156
|
|
|
def save(self, filename): |
|
157
|
|
|
""" Saves this PBS job to a file. |
|
158
|
|
|
|
|
159
|
|
|
Parameters |
|
160
|
|
|
---------- |
|
161
|
|
|
filename : str |
|
162
|
|
|
specified where to save this PBS file |
|
163
|
|
|
""" |
|
164
|
|
|
with open(filename, 'w') as pbs_file: |
|
165
|
|
|
self.prolog.insert(0, "PBS_FILENAME=%s" % filename) |
|
166
|
|
|
pbs_file.write(str(self)) |
|
167
|
|
|
self.prolog.pop(0) |
|
168
|
|
|
|
|
169
|
|
|
def __str__(self): |
|
170
|
|
|
pbs = [] |
|
171
|
|
|
pbs += ["#!/bin/bash"] |
|
172
|
|
|
|
|
173
|
|
|
for option_name, option_value in self.options.items(): |
|
174
|
|
|
if option_value == "": |
|
175
|
|
|
pbs += ["#PBS {0}".format(option_name)] |
|
176
|
|
|
else: |
|
177
|
|
|
pbs += ["#PBS {0} {1}".format(option_name, option_value)] |
|
178
|
|
|
|
|
179
|
|
|
for resource_name, resource_value in self.resources.items(): |
|
180
|
|
|
pbs += ["#PBS -l {0}={1}".format(resource_name, resource_value)] |
|
181
|
|
|
|
|
182
|
|
|
for option_name, option_value in self.sbatch_options.items(): |
|
183
|
|
|
if option_name.startswith('--'): |
|
184
|
|
|
pbs += ["#SBATCH {0}={1}".format(option_name, option_value)] |
|
185
|
|
|
else: |
|
186
|
|
|
pbs += ["#SBATCH {0} {1}".format(option_name, option_value)] |
|
187
|
|
|
|
|
188
|
|
|
pbs += ["\n# Modules #"] |
|
189
|
|
|
for module in self.modules: |
|
190
|
|
|
pbs += ["module load " + module] |
|
191
|
|
|
|
|
192
|
|
|
pbs += ["\n# Prolog #"] |
|
193
|
|
|
pbs += self.prolog |
|
194
|
|
|
|
|
195
|
|
|
pbs += ["\n# Commands #"] |
|
196
|
|
|
pbs += ["{command}".format(command=command) for command in self.commands] |
|
197
|
|
|
|
|
198
|
|
|
pbs += ["\n# Epilog #"] |
|
199
|
|
|
pbs += self.epilog |
|
200
|
|
|
|
|
201
|
|
|
return "\n".join(pbs) |
|
202
|
|
|
|