JobGenerator - Code Metrics - Inspection of "Merge pull request #122 from mgermain/pbsflags" - SMART-Lab/smartdispatch - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 981c14...d395f4 )

by Marc-Alexandre

created 2016-04-15 15:27 UTC

JobGenerator A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	110
Duplicated Lines	0 %

Metric	Value
wmc	21
dl	0
loc	110
rs	10

7 Methods

Rating	Name	Size	Complexity
A	__init__()	11	1
A	_add_cluster_specific_rules()	2	1
A	specify_account_name_from_env()	7	3
B	add_pbs_flags()	18	5
A	specify_account_name_from_file()	9	4
A	write_pbs_files()	15	2
B	_generate_base_pbs()	28	5

from __future__ import absolute_import

import os
import re
from smartdispatch.pbs import PBS
from smartdispatch import utils


def job_generator_factory(queue, commands, command_params={}, cluster_name=None, base_path="./"):
    if cluster_name == "guillimin":
        return GuilliminJobGenerator(queue, commands, command_params, base_path)
    elif cluster_name == "mammouth":
        return MammouthJobGenerator(queue, commands, command_params, base_path)
    elif cluster_name == "helios":
        return HeliosJobGenerator(queue, commands, command_params, base_path)
    elif cluster_name == "hades":
        return HadesJobGenerator(queue, commands, command_params, base_path)

    return JobGenerator(queue, commands, command_params, base_path)


class JobGenerator(object):

    """ Offers functionalities to generate PBS files for a given queue.

    Parameters
    ----------
    queue : `Queue` instance
        queue on which commands will be executed
    commands : list of str
        commands to put in PBS files
    command_params : dict
        information about the commands
    """

    def __init__(self, queue, commands, command_params={}, base_path="./"):
        self.commands = commands
        self.queue = queue
        self.job_log_filename = '"{base_path}/logs/job/"$PBS_JOBID".{{ext}}"'.format(base_path=base_path)

        self.nb_cores_per_command = command_params.get('nb_cores_per_command', 1)
        self.nb_gpus_per_command = command_params.get('nb_gpus_per_command', 1)
        #self.mem_per_command = command_params.get('mem_per_command', 0.0)

        self.pbs_list = self._generate_base_pbs()
        self._add_cluster_specific_rules()

    def _add_cluster_specific_rules(self):
        pass

    def add_pbs_flags(self, flags):
        resources = {}
        options = {}

        for flag in flags:
            flag = flag
            if flag.startswith('-l'):
                resource = flag[2:]
                split = resource.find('=')
                resources[resource[:split]] = resource[split+1:]
            elif flag.startswith('-'):
                options[flag[1:2]] = flag[2:]
            else:
                raise ValueError("Invalid PBS flag ({})".format(flag))

        for pbs in self.pbs_list:
            pbs.add_resources(**resources)
            pbs.add_options(**options)

    def _generate_base_pbs(self):
        """ Generates PBS files allowing the execution of every commands on the given queue. """
        nb_commands_per_node = self.queue.nb_cores_per_node // self.nb_cores_per_command

        if self.queue.nb_gpus_per_node > 0 and self.nb_gpus_per_command > 0:
            nb_commands_per_node = min(nb_commands_per_node, self.queue.nb_gpus_per_node // self.nb_gpus_per_command)

        pbs_files = []
        # Distribute equally the jobs among the PBS files and generate those files
        for i, commands in enumerate(utils.chunks(self.commands, n=nb_commands_per_node)):
            pbs = PBS(self.queue.name, self.queue.walltime)

            # TODO Move the add_options into the JobManager once created.
            pbs.add_options(o=self.job_log_filename.format(ext='out'), e=self.job_log_filename.format(ext='err'))

            # Set resource: nodes
            resource = "1:ppn={ppn}".format(ppn=len(commands) * self.nb_cores_per_command)
            if self.queue.nb_gpus_per_node > 0:
                resource += ":gpus={gpus}".format(gpus=len(commands) * self.nb_gpus_per_command)

            pbs.add_resources(nodes=resource)

            pbs.add_modules_to_load(*self.queue.modules)
            pbs.add_commands(*commands)

            pbs_files.append(pbs)

        return pbs_files

    def write_pbs_files(self, pbs_dir="./"):
        """ Writes PBS files allowing the execution of every commands on the given queue.

        Parameters
        ----------
        pbs_dir : str
            folder where to save pbs files
        """
        pbs_filenames = []
        for i, pbs in enumerate(self.pbs_list):
            pbs_filename = os.path.join(pbs_dir, 'job_commands_' + str(i) + '.sh')
            pbs.save(pbs_filename)
            pbs_filenames.append(pbs_filename)

        return pbs_filenames

    def specify_account_name_from_env(self, environment_variable_name):
        if environment_variable_name not in os.environ:
            raise ValueError("Undefined environment variable: ${}. Please, provide your account name!".format(environment_variable_name))

        account_name = os.path.basename(os.path.realpath(os.getenv(environment_variable_name)))
        for pbs in self.pbs_list:
            pbs.add_options(A=account_name)

    def specify_account_name_from_file(self, rapid_filename):
        if not os.path.isfile(rapid_filename):
            raise ValueError("Account name file {} does not exist. Please, provide your account name!".format(rapid_filename))

        with open(rapid_filename, 'r') as rapid_file:
            account_name = rapid_file.read().strip()

        for pbs in self.pbs_list:
            pbs.add_options(A=account_name)


class MammouthJobGenerator(JobGenerator):

    def _add_cluster_specific_rules(self):
        if self.queue.name.endswith("@mp2"):
            for pbs in self.pbs_list:
                pbs.resources['nodes'] = re.sub("ppn=[0-9]+", "ppn=1", pbs.resources['nodes'])


class HadesJobGenerator(JobGenerator):

    def _add_cluster_specific_rules(self):
        for pbs in self.pbs_list:
            gpus = re.match(".*gpus=([0-9]+)", pbs.resources['nodes']).group(1)
            pbs.resources['nodes'] = re.sub("ppn=[0-9]+", "ppn={}".format(gpus), pbs.resources['nodes'])
            pbs.resources['nodes'] = re.sub(":gpus=[0-9]+", "", pbs.resources['nodes'])


class GuilliminJobGenerator(JobGenerator):

    def _add_cluster_specific_rules(self):
        return self.specify_account_name_from_env('HOME_GROUP')


# https://wiki.calculquebec.ca/w/Ex%C3%A9cuter_une_t%C3%A2che#tab=tab6
class HeliosJobGenerator(JobGenerator):

    def _add_cluster_specific_rules(self):
        self.specify_account_name_from_file(os.path.join(os.environ['HOME'], ".default_rap"))

        for pbs in self.pbs_list:
            # Remove forbidden ppn option. Default is 2 cores per gpu.
            pbs.resources['nodes'] = re.sub(":ppn=[0-9]+", "", pbs.resources['nodes'])


1			from __future__ import absolute_import
2
3			import os
4			import re
5			from smartdispatch.pbs import PBS
6			from smartdispatch import utils
7
8
9			def job_generator_factory(queue, commands, command_params={}, cluster_name=None, base_path="./"):
10			if cluster_name == "guillimin":
11			return GuilliminJobGenerator(queue, commands, command_params, base_path)
12			elif cluster_name == "mammouth":
13			return MammouthJobGenerator(queue, commands, command_params, base_path)
14			elif cluster_name == "helios":
15			return HeliosJobGenerator(queue, commands, command_params, base_path)
16			elif cluster_name == "hades":
17			return HadesJobGenerator(queue, commands, command_params, base_path)
18
19			return JobGenerator(queue, commands, command_params, base_path)
20
21
22			class JobGenerator(object):
23
24			""" Offers functionalities to generate PBS files for a given queue.
25
26			Parameters
27			----------
28			queue : `Queue` instance
29			queue on which commands will be executed
30			commands : list of str
31			commands to put in PBS files
32			command_params : dict
33			information about the commands
34			"""
35
36			def __init__(self, queue, commands, command_params={}, base_path="./"):
37			self.commands = commands
38			self.queue = queue
39			self.job_log_filename = '"{base_path}/logs/job/"$PBS_JOBID".{{ext}}"'.format(base_path=base_path)
40
41			self.nb_cores_per_command = command_params.get('nb_cores_per_command', 1)
42			self.nb_gpus_per_command = command_params.get('nb_gpus_per_command', 1)
43			#self.mem_per_command = command_params.get('mem_per_command', 0.0)
44
45			self.pbs_list = self._generate_base_pbs()
46			self._add_cluster_specific_rules()
47
48			def _add_cluster_specific_rules(self):
49			pass
50
51			def add_pbs_flags(self, flags):
52			resources = {}
53			options = {}
54
55			for flag in flags:
56			flag = flag
57			if flag.startswith('-l'):
58			resource = flag[2:]
59			split = resource.find('=')
60			resources[resource[:split]] = resource[split+1:]
61			elif flag.startswith('-'):
62			options[flag[1:2]] = flag[2:]
63			else:
64			raise ValueError("Invalid PBS flag ({})".format(flag))
65
66			for pbs in self.pbs_list:
67			pbs.add_resources(**resources)
68			pbs.add_options(**options)
69
70			def _generate_base_pbs(self):
71			""" Generates PBS files allowing the execution of every commands on the given queue. """
72			nb_commands_per_node = self.queue.nb_cores_per_node // self.nb_cores_per_command
73
74			if self.queue.nb_gpus_per_node > 0 and self.nb_gpus_per_command > 0:
75			nb_commands_per_node = min(nb_commands_per_node, self.queue.nb_gpus_per_node // self.nb_gpus_per_command)
76
77			pbs_files = []
78			# Distribute equally the jobs among the PBS files and generate those files
79			for i, commands in enumerate(utils.chunks(self.commands, n=nb_commands_per_node)):
80			pbs = PBS(self.queue.name, self.queue.walltime)
81
82			# TODO Move the add_options into the JobManager once created.
83			pbs.add_options(o=self.job_log_filename.format(ext='out'), e=self.job_log_filename.format(ext='err'))
84
85			# Set resource: nodes
86			resource = "1:ppn={ppn}".format(ppn=len(commands) * self.nb_cores_per_command)
87			if self.queue.nb_gpus_per_node > 0:
88			resource += ":gpus={gpus}".format(gpus=len(commands) * self.nb_gpus_per_command)
89
90			pbs.add_resources(nodes=resource)
91
92			pbs.add_modules_to_load(*self.queue.modules)
93			pbs.add_commands(*commands)
94
95			pbs_files.append(pbs)
96
97			return pbs_files
98
99			def write_pbs_files(self, pbs_dir="./"):
100			""" Writes PBS files allowing the execution of every commands on the given queue.
101
102			Parameters
103			----------
104			pbs_dir : str
105			folder where to save pbs files
106			"""
107			pbs_filenames = []
108			for i, pbs in enumerate(self.pbs_list):
109			pbs_filename = os.path.join(pbs_dir, 'job_commands_' + str(i) + '.sh')
110			pbs.save(pbs_filename)
111			pbs_filenames.append(pbs_filename)
112
113			return pbs_filenames
114
115			def specify_account_name_from_env(self, environment_variable_name):
116			if environment_variable_name not in os.environ:
117			raise ValueError("Undefined environment variable: ${}. Please, provide your account name!".format(environment_variable_name))
118
119			account_name = os.path.basename(os.path.realpath(os.getenv(environment_variable_name)))
120			for pbs in self.pbs_list:
121			pbs.add_options(A=account_name)
122
123			def specify_account_name_from_file(self, rapid_filename):
124			if not os.path.isfile(rapid_filename):
125			raise ValueError("Account name file {} does not exist. Please, provide your account name!".format(rapid_filename))
126
127			with open(rapid_filename, 'r') as rapid_file:
128			account_name = rapid_file.read().strip()
129
130			for pbs in self.pbs_list:
131			pbs.add_options(A=account_name)
132
133
134			class MammouthJobGenerator(JobGenerator):
135
136			def _add_cluster_specific_rules(self):
137			if self.queue.name.endswith("@mp2"):
138			for pbs in self.pbs_list:
139			pbs.resources['nodes'] = re.sub("ppn=[0-9]+", "ppn=1", pbs.resources['nodes'])
140
141
142			class HadesJobGenerator(JobGenerator):
143
144			def _add_cluster_specific_rules(self):
145			for pbs in self.pbs_list:
146			gpus = re.match(".*gpus=([0-9]+)", pbs.resources['nodes']).group(1)
147			pbs.resources['nodes'] = re.sub("ppn=[0-9]+", "ppn={}".format(gpus), pbs.resources['nodes'])
148			pbs.resources['nodes'] = re.sub(":gpus=[0-9]+", "", pbs.resources['nodes'])
149
150
151			class GuilliminJobGenerator(JobGenerator):
152
153			def _add_cluster_specific_rules(self):
154			return self.specify_account_name_from_env('HOME_GROUP')
155
156
157			# https://wiki.calculquebec.ca/w/Ex%C3%A9cuter_une_t%C3%A2che#tab=tab6
158			class HeliosJobGenerator(JobGenerator):
159
160			def _add_cluster_specific_rules(self):
161			self.specify_account_name_from_file(os.path.join(os.environ['HOME'], ".default_rap"))
162
163			for pbs in self.pbs_list:
164			# Remove forbidden ppn option. Default is 2 cores per gpu.
165			pbs.resources['nodes'] = re.sub(":ppn=[0-9]+", "", pbs.resources['nodes'])
166

SMART-Lab / smartdispatch

Push — master ( 981c14...d395f4 )

JobGenerator A

Complexity

Size/Duplication

7 Methods

Duplication Side-by-Side

Filter issues like