1
|
|
|
#!/usr/bin/env python2 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
import os |
5
|
|
|
import sys |
6
|
|
|
import signal |
7
|
|
|
import argparse |
8
|
|
|
import subprocess |
9
|
|
|
import logging |
10
|
|
|
import time as t |
11
|
|
|
|
12
|
|
|
from smartdispatch import utils |
13
|
|
|
from smartdispatch.command_manager import CommandManager |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
def parse_arguments(): |
17
|
|
|
parser = argparse.ArgumentParser() |
18
|
|
|
parser.add_argument('commands_filename', type=str, help='File containing all commands to execute.') |
19
|
|
|
parser.add_argument('logs_dir', type=str, help="Folder where to put commands' stdout and stderr.") |
20
|
|
|
args = parser.parse_args() |
21
|
|
|
|
22
|
|
|
# Check for invalid arguments |
23
|
|
|
if not os.path.isfile(args.commands_filename): |
24
|
|
|
parser.error("Invalid file path. Specify path to a file containing commands.") |
25
|
|
|
|
26
|
|
|
if not os.path.isdir(args.logs_dir): |
27
|
|
|
parser.error("You need to specify the folder path where to put command' stdout and stderr.") |
28
|
|
|
|
29
|
|
|
return args |
30
|
|
|
|
31
|
|
|
|
32
|
|
|
def main(): |
33
|
|
|
# Necessary if we want 'logging.info' to appear in stderr. |
34
|
|
|
logging.root.setLevel(logging.INFO) |
35
|
|
|
|
36
|
|
|
args = parse_arguments() |
37
|
|
|
|
38
|
|
|
command_manager = CommandManager(args.commands_filename) |
39
|
|
|
|
40
|
|
|
# Handle TERM signal gracefully by sending running commands back to |
41
|
|
|
# the list of pending commands. |
42
|
|
|
def sigterm_handler(signal, frame): |
43
|
|
|
if sigterm_handler.triggered: |
44
|
|
|
return |
45
|
|
|
else: |
46
|
|
|
sigterm_handler.triggered = True |
47
|
|
|
if sigterm_handler.command is not None: |
48
|
|
|
command_manager.set_running_command_as_pending(sigterm_handler.command) |
49
|
|
|
sys.exit(0) |
50
|
|
|
|
51
|
|
|
sigterm_handler.triggered = False |
52
|
|
|
sigterm_handler.command = None |
53
|
|
|
signal.signal(signal.SIGTERM, sigterm_handler) |
54
|
|
|
|
55
|
|
|
while True: |
56
|
|
|
command = command_manager.get_command_to_run() |
57
|
|
|
sigterm_handler.command = command |
58
|
|
|
|
59
|
|
|
if command is None: |
60
|
|
|
break |
61
|
|
|
|
62
|
|
|
uid = utils.generate_uid_from_string(command) |
63
|
|
|
stdout_filename = os.path.join(args.logs_dir, uid + ".out") |
64
|
|
|
stderr_filename = os.path.join(args.logs_dir, uid + ".err") |
65
|
|
|
|
66
|
|
|
# Get job and node ID |
67
|
|
|
job_id = os.environ.get('PBS_JOBID', 'undefined') |
68
|
|
|
node_name = os.environ.get('HOSTNAME', 'undefined') |
69
|
|
|
|
70
|
|
|
with open(stdout_filename, 'a') as stdout_file: |
71
|
|
|
with open(stderr_filename, 'a') as stderr_file: |
72
|
|
|
log_datetime = t.strftime("## SMART-DISPATCH - Started on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n".format(job_id=job_id, node_name=node_name)) |
73
|
|
|
if stdout_file.tell() > 0: # Not the first line in the log file. |
74
|
|
|
log_datetime = t.strftime("\n## SMART-DISPATCH - Resumed on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n".format(job_id=job_id, node_name=node_name)) |
75
|
|
|
|
76
|
|
|
log_command = "## SMART-DISPATCH - Command: " + command + '\n' |
77
|
|
|
|
78
|
|
|
stdout_file.write(log_datetime + log_command) |
79
|
|
|
stdout_file.flush() |
80
|
|
|
stderr_file.write(log_datetime + log_command) |
81
|
|
|
stderr_file.flush() |
82
|
|
|
|
83
|
|
|
error_code = subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True) |
84
|
|
|
|
85
|
|
|
command_manager.set_running_command_as_finished(command, error_code) |
86
|
|
|
|
87
|
|
|
if __name__ == '__main__': |
88
|
|
|
main() |
89
|
|
|
|