Conditions | 20 |
Total Lines | 83 |
Lines | 0 |
Ratio | 0 % |
Changes | 3 | ||
Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like MultiGPUTrainer.train() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #!/usr/bin/env python |
||
57 | def train(self, train_set, valid_set=None, test_set=None, train_size=None): |
||
58 | """ |
||
59 | Train the model in multi-GPU environment. |
||
60 | """ |
||
61 | server_port = self._port |
||
62 | param_map = self.create_param_map() |
||
63 | # Initialize the worker |
||
64 | worker = Worker(control_port=server_port) |
||
65 | if self.config.learning_rate: |
||
66 | worker.send_req({'init_schedule': self._schedule_params}) |
||
67 | self.sync_hyperparams(worker.send_req('sync_hyperparams')['sync_hyperparams']) |
||
68 | easgd_alpha = worker.send_req('get_easgd_alpha') |
||
69 | worker.init_shared_params(param_map.values(), param_sync_rule=EASGD(easgd_alpha)) |
||
70 | worker.copy_to_local() |
||
71 | worker.send_req({ |
||
72 | "set_names": None, |
||
73 | "training_names": self.training_names, |
||
74 | "evaluation_names": self.evaluation_names |
||
75 | }) |
||
76 | # Load all training batches, consume vast memory here |
||
77 | self.logger.info("started process {}".format(os.getpid())) |
||
78 | self.logger.info("(proc {}) load training data".format(os.getpid())) |
||
79 | train_batches = list(train_set) |
||
80 | network_callback = bool(self.network.training_callbacks) |
||
81 | trainer_callback = bool(self._iter_callbacks) |
||
82 | while True: |
||
83 | resp = worker.send_req('next') |
||
84 | if resp == 'stop': |
||
85 | break |
||
86 | elif resp == 'wait': |
||
87 | time.sleep(1) |
||
88 | elif resp == 'get_num_batches': |
||
89 | worker.send_req({'get_num_batches_done': len(train_batches)}) |
||
90 | elif 'eval' in resp: |
||
91 | self.best_cost = resp['best_valid_cost'] |
||
92 | worker.copy_to_local() |
||
93 | valid_costs = None |
||
94 | test_costs = None |
||
95 | if valid_set: |
||
96 | self._run_valid(self.epoch, valid_set) |
||
97 | self.fix_costs() |
||
98 | valid_costs = self.last_run_costs |
||
99 | if test_set: |
||
100 | self._run_test(self.epoch, test_set) |
||
101 | self.fix_costs() |
||
102 | test_costs = self.last_run_costs |
||
103 | worker.send_req({ |
||
104 | "eval_done": None, |
||
105 | "valid_costs": valid_costs, |
||
106 | "test_costs": test_costs, |
||
107 | "auto_save": self.config.auto_save |
||
108 | }) |
||
109 | elif 'valid' in resp: |
||
110 | self.best_cost = resp['best_valid_cost'] |
||
111 | worker.copy_to_local() |
||
112 | if valid_set: |
||
113 | self._run_valid(self.epoch, valid_set, dry_run=True) |
||
114 | self.fix_costs() |
||
115 | worker.send_req({ |
||
116 | "valid_done": None, |
||
117 | "valid_costs": self.last_run_costs, |
||
118 | "auto_save": self.config.auto_save |
||
119 | }) |
||
120 | elif 'train' in resp: |
||
121 | batch_ids = resp['train'] |
||
122 | batch_costs = [[] for _ in self.training_names] |
||
123 | for batch_id in batch_ids: |
||
124 | x = train_batches[batch_id] |
||
125 | cost_x = self.learn(*x) |
||
126 | for i, cost in enumerate(cost_x): |
||
127 | batch_costs[i].append(cost) |
||
128 | self.last_cost = cost_x[0] |
||
129 | if network_callback: |
||
130 | self.network.training_callback() |
||
131 | if trainer_callback: |
||
132 | for func in self._iter_callbacks: |
||
133 | func(self) |
||
134 | worker.sync_params(synchronous=True) |
||
135 | worker.send_req({'train_done': None, 'costs': [float(np.mean(c)) for c in batch_costs]}) |
||
136 | elif 'sync_hyperparams' in resp: |
||
137 | self.sync_hyperparams(resp['sync_hyperparams']) |
||
138 | worker.close() |
||
139 | return [] |
||
140 |